1. Study design

2. Proposed Method

2.1 Figure S2.1.

rm(list=ls())
# Load necessary libraries
library(dplyr)
library(tidyr)
library(ggplot2)
library(openxlsx)
library(tibble)
library(extrafont)

# Load fonts
loadfonts(device = "win")

# Read data from Excel
Oral <- read.xlsx("Oral Neutrophil CD-Markers.xlsx", colNames = TRUE)

# Reshape Oral data to long format, excluding average columns
Oral_long <- Oral %>%
  filter(`Oral.Neutrophil.CD-Markers` == "Hydrophilicity percentage") %>%
  select(-`Oral.Neutrophil.CD-Markers`, -`average-oPMN`) %>%
  pivot_longer(cols = everything(), names_to = "Marker", values_to = "Value") %>%
  mutate(Remainder = 100 - Value)



# Remove "CD" from Marker labels
Oral_long$Marker <- gsub("CD", "", Oral_long$Marker)


# Convert Marker to a factor and order levels
Oral_long$Marker <- factor(Oral_long$Marker, levels = Oral_long %>%
                             group_by(Marker) %>%
                             summarize(mean_value = mean(Value)) %>%
                             arrange(as.numeric(Marker)) %>%
                             pull(Marker))



# Create a combined data frame for plotting
Oral_combined <- Oral_long %>%
  pivot_longer(cols = c(Value, Remainder), names_to = "Type", values_to = "Percentage")



# Function to create the stacked bar plot with black borders for the white part, and remove "Remainder" from the legend
create_plot <- function(data, title, actual_label) {
  ggplot(data, aes(x = Marker, y = Percentage, fill = Type)) +
    geom_bar(stat = "identity", position = "stack", color = "black") +
    # Correct the scale_fill_manual to ensure that "Hydrophilicity(Oral)" appears navyblue in the legend
    scale_fill_manual(
      values = c("Value" = "navyblue", "Remainder" = "white"),
      labels = c(actual_label),  # Remove the "Remainder" from the legend
      breaks = c("Value")  # Only keep "Value" in the legend
    )+
    labs(
      x = "CD",
      y = "Hydrophilicity(%)") +
    theme_minimal() +
    theme(
      axis.text = element_text(size = 12, family = "Times New Roman"),
      axis.title = element_text(size = 12, family = "Times New Roman"),
      legend.text = element_text(size = 12, family = "Times New Roman"),
      legend.title = element_blank(),
      legend.position = "top",
      panel.background = element_blank(),
      plot.background = element_blank(),
      panel.grid.major = element_blank(),
      panel.grid.minor = element_blank(),
      panel.border = element_rect(color = "black", fill = NA),
    ) +
    scale_x_discrete(breaks = levels(data$Marker)) +
    ylim(0, 100)
}

# Plot the Oral data with only Hydrophilicity(Oral) legend
p_oral <- create_plot(Oral_combined, "Oral Neutrophil CD-Markers", "Hydrophilicity (Oral)")



# Display the plots
print(p_oral)

2.2 Figure S2.2.

rm(list=ls())
# Load necessary libraries
library(dplyr)
library(tidyr)
library(ggplot2)
library(openxlsx)
library(tibble)
library(extrafont)

# Load fonts
loadfonts(device = "win")


Epithelial <- read.xlsx("Epithelial CD-Markers.xlsx", colNames = TRUE)


# Reshape Epithelial data to long format, excluding average columns
Epithelial_long <- Epithelial %>%
  filter(`Epithelial.CD-Markers` == "Hydrophilicity percentage") %>%
  select(-`Epithelial.CD-Markers`, -`Average-Epitheal`) %>%
  pivot_longer(cols = everything(), names_to = "Marker", values_to = "Value") %>%
  mutate(Remainder = 100 - Value)


Epithelial_long$Marker <- gsub("CD", "", Epithelial_long$Marker)


Epithelial_long$Marker <- factor(Epithelial_long$Marker, levels = Epithelial_long %>%
                                   group_by(Marker) %>%
                                   summarize(mean_value = mean(Value)) %>%
                                   arrange(as.numeric(Marker)) %>%
                                   pull(Marker))


Epithelial_combined <- Epithelial_long %>%
  pivot_longer(cols = c(Value, Remainder), names_to = "Type", values_to = "Percentage")

# Function to create the stacked bar plot with black borders for the white part, and remove "Remainder" from the legend
create_plot <- function(data, title, actual_label) {
  ggplot(data, aes(x = Marker, y = Percentage, fill = Type)) +
    geom_bar(stat = "identity", position = "stack", color = "black") +
    # Correct the scale_fill_manual to ensure that "Hydrophilicity(Oral)" appears navyblue in the legend
    scale_fill_manual(
      values = c("Value" = "navyblue", "Remainder" = "white"),
      labels = c(actual_label),  # Remove the "Remainder" from the legend
      breaks = c("Value")  # Only keep "Value" in the legend
    )+
    labs(
      x = "CD",
      y = "Hydrophilicity(%)") +
    theme_minimal() +
    theme(
      axis.text = element_text(size = 12, family = "Times New Roman"),
      axis.title = element_text(size = 12, family = "Times New Roman"),
      legend.text = element_text(size = 12, family = "Times New Roman"),
      legend.title = element_blank(),
      legend.position = "top",
      panel.background = element_blank(),
      plot.background = element_blank(),
      panel.grid.major = element_blank(),
      panel.grid.minor = element_blank(),
      panel.border = element_rect(color = "black", fill = NA),
    ) +
    scale_x_discrete(breaks = levels(data$Marker)) +
    ylim(0, 100)
}


# Plot the Epithelial data with only Hydrophilicity(Epithelial) legend
p_epithelial <- create_plot(Epithelial_combined, "Epithelial CD-Markers", "Hydrophilicity (Epithelial)")
print(p_epithelial)

2.3 Figure 2a

# Load necessary libraries
library(dplyr)
library(tidyr)
library(ggplot2)
library(openxlsx)
library(tibble)
library(extrafont)

# Load fonts
loadfonts(device = "win")

# Read data from Excel
Epithelial <- read.xlsx("Epithelial CD-Markers.xlsx", colNames = TRUE)
Oral <- read.xlsx("Oral Neutrophil CD-Markers.xlsx", colNames = TRUE)

# Prepare Epithelial data
Epithelial <- Epithelial %>%
  mutate(Dataset = "Epithelial") %>%
  select(-contains("Average-Epitheal"))

Epithelial_long <- Epithelial %>%
  pivot_longer(cols = starts_with("CD"),  
               names_to = "Marker",       
               values_to = "Value")       

# Prepare Oral data
Oral <- Oral %>%
  mutate(Dataset = "Oral") %>%
  select(-contains("average-oPMN"))

Oral_long <- Oral %>%
  pivot_longer(cols = starts_with("CD"),  
               names_to = "Marker",       
               values_to = "Value")       

# Combine both datasets
combined_data <- bind_rows(Epithelial_long, Oral_long)
# Convert Marker column to ordered factor based on numeric order
combined_data <- combined_data %>%
  mutate(Marker = factor(Marker, levels = sort(unique(Marker), decreasing = FALSE)))

# Now the markers are sorted from small to large in both datasets in the legend

# Create a unique list of markers for consistent color mapping
unique_markers <- unique(combined_data$Marker)

# Define a consistent color palette for the unique markers
combined_palette <- colorRampPalette(c("navyblue", "lightblue", "#B8860B", "gold"))(length(unique_markers))

# Apply the colors to the markers by making sure the order is consistent
marker_colors <- setNames(combined_palette, unique_markers)

# Plot the combined data
ggplot(combined_data, aes(x = Dataset, y = Value)) +
  geom_boxplot(outlier.shape = NA, color = "black", fill = NA) +  # No fill for the boxplot
  geom_jitter(aes(color = Marker), size = 3, width = 0.2) +
  scale_color_manual(values = marker_colors) +  # Apply the custom color palette for discrete markers
  labs(y = "Hydrophilicity Percentage", x = "Dataset") +
  theme_minimal() +
  theme(axis.text = element_text(size = 12, family = "Times New Roman"),
        axis.title.y = element_text(size = 12, family = "Times New Roman"),
        axis.title.x =  element_blank(),
        legend.text = element_text(size = 12, family = "Times New Roman"),
        legend.position = c(0.25, 0.73),  # Position legend in bottom right inside plot
        legend.direction = "vertical",  # Arrange legend items vertically
        legend.spacing.y = unit(0.02, 'cm'),  # Further reduce space between legend lines
        legend.key.size = unit(0.8, 'lines'),  # Reduce the size of the legend key
        legend.title = element_blank(),
        panel.background = element_blank(),
        plot.background = element_blank(),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.border = element_rect(color = "black", fill = NA),
        aspect.ratio = 1.2/1)

3. Charactrization and Control

3.1 Figure S3.1.

rm(list=ls())
# Load necessary libraries
library(ggplot2)
library(extrafont)
#font_import()
loadfonts(device = "win")
#fonts()
# Create the data frame
data <- data.frame(
  Group = factor(c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10")),
  Before = c(32.3, 32.1, 29.8, 28.1, 30.8, 31.2, 27, 31.6, 28.8, 32.1),
  After = c(21.4, 20.2, 19.1, 14.7, 19.7, 21.4, 15, 23, 16.9, 19.4),
  B_STDEV = c(3.380083, 3.453579, 3.587552, 3.114345, 2.920873, 2.976983, 3.343804, 2.776137, 2.619842, 2.599785),
  A_STDEV = c(4.829832, 4.626718, 4.445078, 3.8666, 3.920148, 3.149643, 3.515564, 3.497314, 3.629574, 2.622988)
)
data$Group <- factor(data$Group, levels = as.character(1:10))
# Melt the data frame for ggplot2
library(reshape2)
data_melted <- melt(data, id.vars = "Group", measure.vars = c("Before", "After"), variable.name = "Condition", value.name = "Value")

# Add the standard deviations to the melted data frame
data_melted$STDEV <- ifelse(data_melted$Condition == "Before", data$B_STDEV, data$A_STDEV)

# Plot the data
p = ggplot(data_melted, aes(x = Group, y = Value, fill = Condition)) +
  geom_bar(stat = "identity", position = "dodge") +
  geom_errorbar(aes(ymin = Value - STDEV, ymax = Value + STDEV), position = position_dodge(0.9), width = 0.25) +
  labs(x = "Groups", y = "Number of OPMNs Per Image") +
  theme_minimal() +
  scale_fill_manual(values = c("Before" = "#377eb8", "After" = "#c7d9f0"),guide = guide_legend(title = NULL))+
  theme(
    axis.text= element_text(size = 12, family = "Times New Roman"),
    axis.title = element_text(size = 12, family = "Times New Roman",face="bold"),
    legend.text = element_text(size = 12, family = "Times New Roman"),
    legend.position = "top", # Position the legend at the top
    legend.direction = "horizontal", # Arrange legend items horizontally
    legend.title = element_text(hjust = 0.5, size = 12, family = "Times New Roman"), # Center-align the legend title
   panel.background = element_blank(), # Remove background color
    plot.background = element_blank(), # Remove plot background color
    panel.grid.major = element_blank(), # Remove major grid lines
    panel.grid.minor = element_blank(), # Remove minor grid lines
    panel.border = element_rect(color = "black", fill = NA)
  )


p

3.2 Figure S3.2.

rm(list=ls())
library(ggplot2)
library(extrafont)
#font_import()
loadfonts(device = "win")
#fonts()
# Create the data frame
data <- data.frame(
  Group = factor(c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10")),
  Before = c(28.7, 30.7, 29.5, 33.2, 28.6, 31.5, 32.2, 29.4, 31.9, 30.5),
  After = c(22.4, 26.8, 25.1, 29.6, 23.2, 27.7, 28.1, 22.5, 28, 27.3),
  B_STDEV = c(3.40098, 4.547282, 4.006938, 4.184628, 4.971027, 2.915476, 3.119829, 3.627059, 2.806738, 2.838231),
  A_STDEV = c(5.337498, 6.069962, 7.109462, 5.660781, 5.95912, 7.394442, 4.532598, 5.147815, 3.399346, 5.292552)
)
data$Group <- factor(data$Group, levels = as.character(1:10))

# Melt the data frame for ggplot2
library(reshape2)
data_melted <- melt(data, id.vars = "Group", measure.vars = c("Before", "After"), variable.name = "Condition", value.name = "Value")

# Add the standard deviations to the melted data frame
data_melted$STDEV <- ifelse(data_melted$Condition == "Before", data$B_STDEV, data$A_STDEV)

# Plot the data
p= ggplot(data_melted, aes(x = Group, y = Value, fill = Condition)) +
  geom_bar(stat = "identity", position = "dodge") +
  geom_errorbar(aes(ymin = Value - STDEV, ymax = Value + STDEV), position = position_dodge(0.9), width = 0.25) +
  labs(x = "Groups", y = "Number of OPMNs Per Image") +
  theme_minimal() +
  scale_fill_manual(values = c("Before" = "#4daf4a", "After" = "#c5e0c5"),guide = guide_legend(title = NULL))+
theme(
  axis.text= element_text(size = 12, family = "Times New Roman"),
  axis.title = element_text(size = 12, family = "Times New Roman",face="bold"),
  legend.text = element_text(size = 12, family = "Times New Roman"),
  legend.position = "top", # Position the legend at the top
  legend.direction = "horizontal", # Arrange legend items horizontally
  legend.title = element_text(hjust = 0.5, size = 12, family = "Times New Roman"),     panel.background = element_blank(), # Remove background color
    plot.background = element_blank(), # Remove plot background color
    panel.grid.major = element_blank(), # Remove major grid lines
    panel.grid.minor = element_blank(), # Remove minor grid lines
    panel.border = element_rect(color = "black", fill = NA)
)

p

3.3 Figure S3.3.

library(ggplot2)
library(dplyr)
library(openxlsx)
library(RColorBrewer)
library(scales)
library(car)
library(ggpubr)
library(extrafont)
library(grid)
library(gridExtra)
library(gtable)

# Load fonts
loadfonts(device = "win")

# Read the data
without_cornstrach_yas <- read.xlsx("without_cornstrach_yas.xlsx", colNames = TRUE)
with_cornstarch_yas <- read.xlsx("with_cornstarch_yas.xlsx", colNames = TRUE)


# Prepare the data for plotting
# For Without Cornstarch
before_values_without <- as.numeric(without_cornstrach_yas[1, -1]) * 2.18 * 10000
after_values_without <- as.numeric(without_cornstrach_yas[2, -1]) * 2.18 * 10000
sd_before_without <- mean(as.numeric(without_cornstrach_yas[3, -1]) * 2.18 * 10000)
sd_after_without <- mean(as.numeric(without_cornstrach_yas[4, -1]) * 2.18 * 10000)

# For With Cornstarch
before_values_with <- as.numeric(with_cornstarch_yas[1, -1]) * 2.18 * 10000
after_values_with <- as.numeric(with_cornstarch_yas[2, -1]) * 2.18 * 10000
sd_before_with <- mean(as.numeric(with_cornstarch_yas[3, -1]) * 2.18 * 10000)
sd_after_with <- mean(as.numeric(with_cornstarch_yas[4, -1]) * 2.18 * 10000)



# Create the plot data frame
plot_data <- data.frame(
  Group = rep(c("A", "B", "C", "D"), each = length(before_values_without)),
  Values = c(before_values_without, after_values_without, before_values_with, after_values_with)
)

# Set the factor levels
plot_data$Group <- factor(plot_data$Group, levels = c("A", "B", "C", "D", "E", "F"))

# Define custom colors
custom_colors <- c("A" = "#377eb8", "B" = "#c7d9f0", "C" = "#4daf4a", "D" = "#c5e0c5")

# Create the box plot and add p-values
p <- ggplot(plot_data, aes(x = Group, y = Values, fill = Group)) +
  geom_jitter(aes(color = NULL),
              position = position_jitter(0.2), alpha = 0.5) +
  geom_boxplot() +
  stat_summary(aes(color = "Mean"), fun = mean, geom = "point", shape = 4, size = 3) +
  stat_summary(aes(color = "Max"), fun = max, geom = "point", shape = 10, size = 3) +
  stat_summary(aes(color = "Min"), fun = min, geom = "point", shape = 2, size = 3) +
  scale_fill_manual(values = custom_colors) +
  scale_color_manual(name = "Statistics", values = c("Mean" = "red", "Max" = "blue", "Min" = "black")) +
  theme_minimal() +
  theme(
    axis.text = element_text(size = 12, family = "Times New Roman"),
    axis.title = element_text(size = 12, family = "Times New Roman",face="bold"),
    legend.text = element_text(size = 12, family = "Times New Roman"),
    legend.position = "top",
    legend.direction = "horizontal",
    legend.title = element_blank(),
    panel.background = element_blank(), # Remove background color
    plot.background = element_blank(), # Remove plot background color
    panel.grid.major = element_blank(), # Set major grid lines to gray
    panel.grid.minor = element_blank(), # Set minor grid lines to light gray
    panel.border = element_rect(color = "black", fill = NA)
  ) +
  scale_y_continuous(labels = function(x) sprintf("%.1f", x / 1e5)) +
  labs(
    y = expression(bold(paste("Number of oPMNs/10ml (" , " x 10"^5, ")")))
  ) +
  guides(fill = "none")

# Adding p-values for specified comparisons
comparisons <- list(
  c("A", "B"),
  c("C", "D"),
  c("A", "C"),
  c("B", "D")
)

p <- p + stat_compare_means(comparisons = comparisons, method = "t.test", method.args = list(var.equal = FALSE), size = 3.5, family = "Times New Roman")

# Define the legend texts and colors
legend_texts <- c("A: Without Cornstarch, Before Wash",
                  "C: With Cornstarch, Before Wash",
                  "B: Without Cornstarch, After Wash",
                  "D: With Cornstarch, After Wash"
                  )

legend_colors <- c("#377eb8", "#4daf4a","#c7d9f0","#c5e0c5")

# Define the legend font colors
legend_font_colors <- c("#000000", "#000000","#000000","#000000")


# Function to create a grob for each legend item with specified font color
create_legend_grob <- function(text, fill_color, font_color) {
  gTree(children = gList(
    rectGrob(gp = gpar(fill = fill_color, col = NA)),
    textGrob(text, gp = gpar(fontface = "bold", col = font_color, fontsize = 8), x = 0.5, y = 0.5, just = "center")
  ))
}

# Create the legend grobs with specified font colors
legend_grobs <- mapply(create_legend_grob, legend_texts, legend_colors, legend_font_colors, SIMPLIFY = FALSE)

# Create a matrix of legend grobs
legend_matrix <- matrix(legend_grobs, nrow = 2, byrow = TRUE)

# Convert the list of grobs to a gtable
legend_gtable <- gtable_matrix("legend_table", grobs = legend_matrix, 
                               widths = unit(rep(1, 2), "null"), 
                               heights = unit(rep(0.5, 2), "null"))

# Draw the legend table
#grid.newpage()
#grid.draw(legend_gtable)

# Combine the legend and the main plot
combined_plot <- arrangeGrob(legend_gtable, p, nrow = 2, heights = c(1, 4)) # Adjust heights as needed

# Display the combined plot
grid.newpage()
grid.draw(combined_plot)

# Save the combined plot
ggsave("combined_plot_updated.png", plot = combined_plot, width = 14, height = 10, dpi = 300)

3.4 Figure S3.4.

rm(list=ls())
# Load necessary libraries
library(ggplot2)
library(reshape2)
library(extrafont)
#font_import()
loadfonts(device = "win")
# Create the data frame
data <- data.frame(
  Condition = factor(c("Standard Filter", "0.2mg(CS)/ml", "0.4mg(CS)/ml", "0.6mg(CS)/ml", "0.8mg(CS)/ml", "1 mg(CS)/ml"),
                     levels = c("Standard Filter", "0.2mg(CS)/ml", "0.4mg(CS)/ml", "0.6mg(CS)/ml", "0.8mg(CS)/ml", "1 mg(CS)/ml")),
  oPMNs = c(33.71, 20.15, 24.05, 33.5, 33.52, 33.55),
  Epithelial = c(0, 1.12, 1.83, 2.1, 4.32, 5.17),
  Debri = c(0.8, 2.1, 2.3, 2.54, 5.6, 6.18),
  O_SD = c(6.63009, 5.03958, 6.692297, 5.947136, 7.19649, 6.8133),
  E_SD = c(0, 1.67332, 1.297771, 1.68333, 1.7252, 2.564433),
  D_SD = c(0.606977, 1.486784, 1.76516, 1.431782, 1.49032, 2.058998)
)

# Remove the "Standard Filter" row
data <- data %>% filter(Condition != 'Standard Filter')

# Update the Condition levels to remove "mg(CS)/ml"
data$Condition <- factor(data$Condition, levels = c("0.2mg(CS)/ml", "0.4mg(CS)/ml", "0.6mg(CS)/ml", "0.8mg(CS)/ml", "1 mg(CS)/ml"),
                         labels = c("0.2", "0.4", "0.6", "0.8", "1.0"))

# Melt the data frame for ggplot2
data_melted <- melt(data, id.vars = "Condition", measure.vars = c("oPMNs", "Epithelial", "Debri"), variable.name = "Type", value.name = "Value")

# Add the standard deviations to the melted data frame
data_melted$SD <- ifelse(data_melted$Type == "oPMNs", data$O_SD, ifelse(data_melted$Type == "Epithelial", data$E_SD, data$D_SD))

# Plot the data
p <- ggplot(data_melted, aes(x = Condition, y = Value, fill = Type)) +
  geom_bar(stat = "identity", position = "dodge") +
  geom_errorbar(aes(ymin = Value - SD, ymax = Value + SD), position = position_dodge(0.9), width = 0.25) +
  labs(x = "Concentration (mg/ml)", y = "Number of oPMNs Per Image") +
  theme_minimal() +
  scale_fill_manual(values = c("oPMNs" = "blue", "Epithelial" = "orange", "Debri" = "red"))+
   theme(
    axis.text = element_text(size = 12, family = "Times New Roman"),
    axis.title = element_text(size = 12, family = "Times New Roman",face="bold"),
    legend.text = element_text(size = 12, family = "Times New Roman"),
    legend.position = "top",
    legend.direction = "horizontal",
    legend.title = element_blank(),
    panel.background = element_blank(),
    plot.background = element_blank(),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    panel.border = element_rect(color = "black", fill = NA)
  )

p

3.5 Figure S3.5.

library(ggplot2)
library(dplyr)
library(tidyr)
library(extrafont)
#font_import()
loadfonts(device = "win")
# Data from the provided table
data <- data.frame(
  Time = c('Standard Filter', '5min', '10min', '15min', '20min', '25min'),
  oPMNs = c(34.51, 21.1, 25.78, 34.33, 34.33, 34.33),
  Epithelial = c(0, 2.01, 2.55, 2.7, 6.2, 7.51),
  Debri = c(2.05, 2.08, 2.25, 2.58, 4.83, 5.66),
  O_SD = c(8.07, 6.82, 11.78, 6.71, 13.58, 6.59),
  E_SD = c(0, 1.69, 1.38, 1.97, 2.99, 3.09),
  D_SD = c(0.79, 1.71, 1.87, 1.9, 1.96, 2.06)
)

# Remove the "Standard Filter" row
data <- data %>% filter(Time != 'Standard Filter')

# Set the levels for the Time factor to order the x-axis
data$Time <- factor(data$Time, levels = c('5min', '10min', '15min', '20min', '25min'))

# Reshape data for ggplot
data_long <- data %>%
  pivot_longer(cols = c(oPMNs, Epithelial, Debri),
               names_to = "Type",
               values_to = "Value") %>%
  mutate(SD = case_when(
    Type == "oPMNs" ~ O_SD,
    Type == "Epithelial" ~ E_SD,
    Type == "Debri" ~ D_SD
  ))

# Define the colors to match the original plot
colors <- c("oPMNs" = "blue", "Epithelial" = "orange", "Debri" = "red")

# Set the order of the Type factor
data_long$Type <- factor(data_long$Type, levels = c("oPMNs", "Epithelial", "Debri"))

# Plot using ggplot2
p <- ggplot(data_long, aes(x = Time, y = Value, fill = Type)) +
  geom_bar(stat = "identity", position = position_dodge()) +
  geom_errorbar(aes(ymin = Value - SD, ymax = Value + SD),
                width = 0.2,
                position = position_dodge(0.9)) +
  scale_fill_manual(values = colors) +
  scale_x_discrete(labels = c("5", "10", "15", "20", "25")) +
  labs(x = "Time (min)", y = "Number of oPMNs Per Image", fill = "Type") +
  theme_minimal() +
   theme(
    axis.text = element_text(size = 12, family = "Times New Roman"),
    axis.title = element_text(size = 12, family = "Times New Roman",face="bold"),
    legend.text = element_text(size = 12, family = "Times New Roman"),
    legend.position = "top",
    legend.direction = "horizontal",
    legend.title = element_blank(),
    panel.background = element_blank(),
    plot.background = element_blank(),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    panel.border = element_rect(color = "black", fill = NA)
  )

p

3.6 Figure S3.6.

rm(list=ls())
library(ggplot2)
library(reshape2)

# Create the data frame
data <- data.frame(
  Condition = rep(c("5", "10", "15", "20", "25"), each = 2),
  Wash = rep(c("Before wash", "After wash"), times = 5),
  Value = c(37.8, 29.8, 40.42, 38.6, 38.1, 31.6, 42.93, 28.8, 42.93, 20.13),
  SD = c(8.421137426, 4.430199394, 9.11994152, 6.609757098, 8.973046058, 2.649947589, 4.891716354, 4.06939799, 6.577402392, 3.964284999)
)

# Convert to long format for ggplot2
data_melted <- melt(data, id.vars = c("Condition", "Wash"), measure.vars = "Value", variable.name = "Type", value.name = "Value")

# Add the standard deviations to the melted data frame
data_melted$SD <- rep(data$SD, each = 1)

# Ensure the correct order of the levels for the fill and x-axis aesthetics
data_melted$Wash <- factor(data_melted$Wash, levels = c("Before wash", "After wash"))
data_melted$Condition <- factor(data_melted$Condition, levels = c("5", "10", "15", "20", "25"))

# Plot the data
p <- ggplot(data_melted, aes(x = Condition, y = Value, fill = Wash)) +
  geom_bar(stat = "identity", position = "dodge") +
  geom_errorbar(aes(ymin = Value - SD, ymax = Value + SD), position = position_dodge(0.9), width = 0.25) +
  labs(x = "PBS Volume (mL)", y = "Number of oPMNs Per Image") +
  theme_minimal() +
  theme(
    axis.text = element_text(size = 12, family = "Times New Roman"),
    axis.title = element_text(size = 12, family = "Times New Roman",face="bold"),
    legend.text = element_text(size = 12, family = "Times New Roman", margin = margin(r = 10)),
    legend.position = "top", # Position the legend at the top
    legend.direction = "horizontal", # Arrange legend items horizontally
    legend.title = element_blank(),
    panel.background = element_blank(), # Set the background color to white
    plot.background = element_blank(), # Set the plot background color to white
    panel.grid.major = element_blank(), # Remove major grid lines
    panel.grid.minor = element_blank(),
      panel.border = element_rect(color = "black", fill = NA)
  ) +
  scale_fill_manual(values = c("Before wash" = "blue", "After wash" = "#00BA38"),
                    labels = c("Before", "After"))

p

3.7 Figure S3.7.

# Load necessary libraries
library(ggplot2)
library(dplyr)

# Create the updated data frame
data <- data.frame(
  PBS.concentration = c('100', '100', '66', '66', '33', '33', '0', '0', # First 4 rows for Before and After
                        '100', '100', '66', '66', '33', '33', '0', '0', # Second 4 rows for Before and After
                        '100', '100', '66', '66', '33', '33', '0', '0', # Third 4 rows for Before and After
                        '100', '100', '66', '66', '33', '33', '0', '0'), # Fourth 4 rows for Before and After
  Condition = rep(c('Before', 'After'), times = 16), # Alternating Before and After
  Average = c(29, 18, 22.54545455, 7.045454545, 27.55555556, 3.444444444, 26.23076923, 2.980769231,
              27, 22, 25.36363636, 9.863636364, 22.38888889, 6.888888889, 30.40384615, 0.596153846,
              30, 16, 26.77272727, 14.09090909, 29.27777778, 3.444444444, 28.01923077, 2.980769231,
              27, 17, 22.54545455, 4.227272727, 25.83333333, 10.33333333, 33.38461538, 2.980769231),
  Standard_deviation = c(2, 3, 2.5, 3.5, 2, 3, 2.5, 3,
                         2.5, 3.5, 2, 3, 2.5, 3.5, 2, 3,
                         2, 3.5, 2.5, 3.5, 2, 3, 2.5, 3,
                         2.5, 3.5, 2, 3.5, 2.5, 3.5, 2, 3)
)

# Ensure the PBS concentration is treated as a factor and in the correct order
data$PBS.concentration <- factor(data$PBS.concentration, levels = c('0', '33', '66', '100'), ordered = TRUE)
data$Condition <- factor(data$Condition, levels = c("Before", "After"))

# Group by PBS concentration and Condition, then calculate the mean of the standard deviation
data_summary <- data %>%
  group_by(PBS.concentration, Condition) %>%
  summarise(Average = mean(Average), Mean_SD = mean(Standard_deviation))

# Create the plot with dodged bars and mean error bars
p <- ggplot(data_summary, aes(x = PBS.concentration, y = Average, fill = Condition)) +
  geom_bar(stat = "identity", position = position_dodge(width = 0.9), width = 0.8) +  # Use dodged position
  geom_errorbar(aes(ymin = Average - Mean_SD, ymax = Average + Mean_SD),
                position = position_dodge(0.9), width = 0.25) +  # Add error bars using mean SD
  scale_fill_manual(values = c("Before" = "blue", "After" = "#00BA38")) +
  labs(x = "PBS Concentration (%)", y = "Number of oPMNs Per Image", fill = "Condition") +
  theme_minimal() +
  theme(
    axis.text = element_text(size = 12, family = "Times New Roman"),
    axis.title = element_text(size = 12, family = "Times New Roman", face = "bold"),
    legend.text = element_text(size = 12, family = "Times New Roman"),
    legend.position = "top",
    legend.direction = "horizontal",
    legend.title = element_blank(),
    panel.background = element_blank(),
    plot.background = element_blank(),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    panel.border = element_rect(color = "black", fill = NA)
  )

# Print the plot
print(p)

3.8 Figure S3.8.

library(ggplot2)
library(reshape2)
library(extrafont)
# font_import()
loadfonts(device = "win")
##fonts()

# Create the data frame
data <- data.frame(
  Test = factor(c("T1", "T2", "T3", "T4", "T5", "T6", "T7", "T8", "T9", "T10",
                  "T11", "T12", "T13", "T14", "T15", "T16", "T17", "T18", "T19", "T20",
                  "T21", "T22", "T23", "T24", "T25", "T26", "T27", "T28", "T29", "T30"),
                levels = c("T1", "T2", "T3", "T4", "T5", "T6", "T7", "T8", "T9", "T10",
                           "T11", "T12", "T13", "T14", "T15", "T16", "T17", "T18", "T19", "T20",
                           "T21", "T22", "T23", "T24", "T25", "T26", "T27", "T28", "T29", "T30")),
  Biosa = c(30.3, 30.05, 33.4, 30.1, 33.25, 30.8, 30.4, 32.65, 31.1, 30, 32.45, 28.45, 32.95, 30.5, 30.2, 32.7, 33.1, 33.4, 31.25, 33.3, 33.15, 31.1, 30.15, 32.05, 28.2, 29.2, 29.95, 33.25, 34.95, 30.1),
  Hemo = c(32.45, 32.85, 32.4, 34.15, 34.7, 32.9, 33.2, 35.15, 34.55, 36.05, 33.15, 32.15, 34.3, 35.5, 32.35, 34.15, 34.15, 33.15, 31.2, 35.1, 36.55, 30.8, 31.4, 33.2, 31.8, 27.85, 28.6, 32.3, 35.45, 32.4),
  BIOSA_SD = c(6.53, 7.78, 12.75, 7.31, 10.2, 10.85, 5.7, 8.92, 6.82, 6.94, 11.3, 6.62, 6.95, 7.17, 6.5, 11.43, 10.72, 8.1, 8.05, 9.12, 6.76, 6.09, 6.86, 7.07, 5.53, 8.16, 6.25, 7.46, 8.87, 10.71),
  Hemo_SD = c(8.21, 8.28, 7.15, 8.89, 6.59, 6.82, 7.4, 10.97, 6.7, 10.92, 6.91, 6.99, 7.04, 10.75, 7.22, 6.79, 8.88, 10.25, 7.26, 10.82, 10.3, 7.75, 10.36, 7.38, 7.12, 5.79, 7.62, 6.83, 10.07, 8.58)
)

# Melt the data frame for ggplot2
data_melted <- melt(data, id.vars = "Test", measure.vars = c("Biosa", "Hemo"), variable.name = "Type", value.name = "Value")

# Add the standard deviations to the melted data frame
data_melted$SD <- ifelse(data_melted$Type == "Biosa", data$BIOSA_SD, data$Hemo_SD)

# Plot the data
p <- ggplot(data_melted, aes(x = Test, y = Value, fill = Type)) +
  geom_bar(stat = "identity", position = "dodge") +
  geom_errorbar(aes(ymin = Value - SD, ymax = Value + SD), position = position_dodge(0.9), width = 0.25) +
  labs(x = "Tests (Day)", y = "Number of oPMNs Per Image") +
  theme_minimal() +
  theme(
    axis.text = element_text(size = 12, family = "Times New Roman"),
    axis.title = element_text(size = 12, family = "Times New Roman",face="bold"),
    legend.text = element_text(size = 12, family = "Times New Roman", margin = margin(r = 10)),
    legend.position = "top", # Position the legend at the top
    legend.direction = "horizontal", # Arrange legend items horizontally
    legend.title = element_blank(),
 panel.background = element_blank(), # Remove background color
    plot.background = element_blank(), # Remove plot background color
    panel.grid.major = element_blank(), # Remove major grid lines
    panel.grid.minor = element_blank(), # Remove minor grid lines
    panel.border = element_rect(color = "black", fill = NA)
  ) +
  scale_fill_manual(values = c("Biosa" = "red", "Hemo" = "blue"), labels = c("DePerio", "Control")) +
  scale_x_discrete(breaks = c("T1", "T5", "T10", "T15","T20","T25","T30"), labels = c("1", "5", "10", "15","20","25","30")) # Custom breaks and labels

p

ggsave("compair.png", plot = p, width = 10, height = 6, dpi = 300)

3.9 Figure 3.3.

rm(list=ls())
# Load libraries
library(ggplot2)
library(dplyr)
library(openxlsx)
library(RColorBrewer)
library(scales)
library(car)
library(ggpubr)
library(extrafont)
library(grid)
library(gridExtra)
library(gtable)

# Load fonts
loadfonts(device = "win")

# Read the data
without_cornstrach_yas <- read.xlsx("without_cornstrach_yas.xlsx", colNames = TRUE)
with_cornstarch_yas <- read.xlsx("with_cornstarch_yas.xlsx", colNames = TRUE)
CaCL2 <- read.xlsx("CaCL2 adjust.xlsx", colNames = TRUE)

# Prepare the data for plotting
# For Without Cornstarch
before_values_without <- as.numeric(without_cornstrach_yas[1, -1]) * 2.18 * 10000
after_values_without <- as.numeric(without_cornstrach_yas[2, -1]) * 2.18 * 10000
sd_before_without <- mean(as.numeric(without_cornstrach_yas[3, -1]) * 2.18 * 10000)
sd_after_without <- mean(as.numeric(without_cornstrach_yas[4, -1]) * 2.18 * 10000)

# For With Cornstarch
before_values_with <- as.numeric(with_cornstarch_yas[1, -1]) * 2.18 * 10000
after_values_with <- as.numeric(with_cornstarch_yas[2, -1]) * 2.18 * 10000
sd_before_with <- mean(as.numeric(with_cornstarch_yas[3, -1]) * 2.18 * 10000)
sd_after_with <- mean(as.numeric(with_cornstarch_yas[4, -1]) * 2.18 * 10000)

# For CaCL2
before_values_CaCL2 <- as.numeric(CaCL2[1, -1]) * 2.18 * 10000
after_values_CaCL2 <- as.numeric(CaCL2[2, -1]) * 2.18 * 10000
sd_before_CaCL2 <- mean(as.numeric(CaCL2[3, -1])) * 2.18 * 10000
sd_after_CaCL2 <- mean(as.numeric(CaCL2[4, -1])) * 2.18 * 10000

# Create the plot data frame
plot_data <- data.frame(
  Group = rep(c("A", "B", "C", "D"), each = length(before_values_without)),
  Values = c(before_values_without, after_values_without, before_values_with, after_values_with)
)

# Set the factor levels and their descriptions
plot_data$Group <- factor(plot_data$Group,
                          levels = c("A", "B", "C", "D"),
                          labels = c("A: Bare glass(BW)",
                                     "B: Bare glass(AW)",
                                     "C: Cornstarch(BW)",
                                     "D:Cornstarch(AW)"
                                     ))


# Define jitter colors
jitter_colors <- c("A: Bare glass(BW)" = "navyblue", "B: Bare glass(AW)" = "navyblue", "C: Cornstarch(BW)" = "#B8860B", "D:Cornstarch(AW)" = "#B8860B")

# Create the boxplot with jitter, min, max, and mean with different shapes
p1 <- ggplot(plot_data, aes(x = Group, y = Values)) +
  geom_boxplot(outlier.shape = NA, fill = NA, color = "black") +
  geom_jitter(aes(color = Group), # Color jitter points by Category
              position = position_jitter(0.2), alpha = 2, size = 3, show.legend = FALSE) +
   # Boxplot without outlier points
  # Add min, max, and mean in black color with different shapes and ensure they are included in the legend
  stat_summary(fun = min, aes(shape = "Min", color = "Min"), geom = "point", shape = 15, size = 2) +
  stat_summary(fun = max, aes(shape = "Max", color = "Max"), geom = "point", shape = 17, size = 2) +
  stat_summary(fun = mean, aes(shape = "Mean", color = "Mean"), geom = "point", shape = 16, size = 2) +
  scale_color_manual(values = c(jitter_colors, "Mean" = "black", "Max" = "black", "Min" = "black"),
                     breaks = c("Mean", "Max", "Min")) +  # Only show Mean, Max, Min in the legend
  theme_minimal() +
  theme(
    axis.text = element_text(size = 10, family = "Times New Roman"),
    axis.title = element_text(size = 10, family = "Times New Roman",face = "bold"),
    legend.text = element_text(size = 9, family = "Times New Roman"),
    legend.position = c(0.05, 0.1),
    legend.direction = "vertical", # Arrange legend items vertically
    legend.spacing.y = unit(0.02, 'cm'), # Further reduce space between legend lines
    legend.key.size = unit(0.8, 'lines'), # Reduce the size of the legend key
    legend.title = element_blank(),
    panel.background = element_blank(), # Remove background color
    plot.background = element_blank(), # Remove plot background color
    panel.grid.major = element_blank(),  # Set major grid lines to gray
    panel.grid.minor = element_blank(), # Set minor grid lines to light gray
    panel.border = element_rect(color = "black", fill = NA)
    #aspect.ratio = 1.3/1  # Adjust the aspect ratio to squeeze the plot
  ) +
  scale_y_continuous(labels = function(x) sprintf("%.1f", x / 1e5)) +
  labs(
    y = expression(bold(paste("Number of oPMNs/10ml (" , " x 10"^5, ")"))),  # Adding bold to the y-axis label
    x = expression(bold("Condition"))  # Adding bold to the x-axis label
  )  +
  scale_x_discrete(labels = c("A", "B", "C", "D"))  # Use A, B, C, ... for x-axis ticks


# Display the plot
#print(p1)

##############3
# Read data
opti_time <- read.xlsx("optimization time.xlsx", colNames = TRUE)

# Adjust the data by multiplying all values by *2.18*10000
opti_time[, -1] <- opti_time[, -1] * (2.18 * 10000)

# Extract the values for oPMNs, Epithelial, and Debri excluding "Standard.Filter"
values_opmns <- as.numeric(opti_time[1, -c(1,2)])
values_epithelial <- as.numeric(opti_time[2, -c(1,2)])
values_debri <- as.numeric(opti_time[3, -c(1,2)])

# Extract the standard deviations for oPMNs, Epithelial, and Debri excluding "Standard.Filter"
sd_opmns <- as.numeric(opti_time[4, -c(1,2)])
sd_epithelial <- as.numeric(opti_time[5, -c(1,2)])
sd_debri <- as.numeric(opti_time[6, -c(1,2)])

# Create data frames for plotting with means
data_time <- data.frame(
  Time = rep(colnames(opti_time)[-c(1,2)], 3),
  Value = c(values_opmns, values_epithelial, values_debri),
  SD = c(sd_opmns, sd_epithelial, sd_debri),
  Variable = factor(rep(c("oPMNs", "Epithelial", "Debri"), each = length(values_opmns)), levels = c("oPMNs", "Epithelial", "Debri"))
)

# Extract the value of "Standard.Filter" for each variable
standard_filter_values <- as.numeric(opti_time[1:3, 2])

p2 <- ggplot(data_time, aes(x = factor(sub("min", "", Time), levels = sub("min", "", colnames(opti_time)[-c(1,2)])), y = Value, group = Variable, color = Variable, linetype = Variable)) +
  geom_point(size = 3) +
  geom_line(size = 1) +
  # Add the control lines with explicit color and linetype mappings in aes()
  geom_hline(aes(yintercept = standard_filter_values[1], color = "oPMNs (Cont)", linetype = "oPMNs (Cont)"), size = 0.7) +
  geom_hline(aes(yintercept = standard_filter_values[2], color = "Epithelial (Cont)", linetype = "Epithelial (Cont)"), size = 0.7) +
  geom_hline(aes(yintercept = standard_filter_values[3], color = "Debris (Cont)", linetype = "Debris (Cont)"), size = 0.7) +
  scale_color_manual(
    values = c("oPMNs" = "navyblue", "Epithelial" = "#B8860B", "Debri" = "brown",
               "oPMNs (Cont)" = "navyblue", "Epithelial (Cont)" = "#B8860B", "Debris (Cont)" = "brown"),
    labels = c("oPMNs" = "oPMNs", "Epithelial" = "Epithelial", "Debri" = "Debris",
               "oPMNs (Cont)" = "oPMNs (Cont)", "Epithelial (Cont)" = "Epithelial (Cont)", "Debris (Cont)" = "Debris (Cont)") # Legend labels for both variables and control
  ) +
  scale_linetype_manual(
    values = c("oPMNs" = "solid", "Epithelial" = "solid", "Debri" = "solid",
               "oPMNs (Cont)" = "dashed", "Epithelial (Cont)" = "dashed", "Debris (Cont)" = "dashed")
  ) + # Map linetypes for solid and dashed lines
  labs(x = "Time (min)") +
  theme_minimal() +
  theme(
    axis.text = element_text(size = 11, family = "Times New Roman"),
    axis.title = element_text(size = 11, family = "Times New Roman"),
    legend.text = element_text(size = 10, family = "Times New Roman"),
    legend.position = c(0.4, 0.4), # Position the legend inside the plot to the right
    legend.direction = "vertical", # Arrange legend items vertically
    legend.spacing.y = unit(0.02, 'cm'), # Further reduce space between legend lines
    legend.key.size = unit(0.8, 'lines'), # Reduce the size of the legend key
    legend.title = element_text(hjust = 0.5, size = 25, family = "Times New Roman"), # Center-align the legend title
    panel.background = element_blank(), # Remove background color
    plot.background = element_blank(), # Remove plot background color
    panel.grid.major = element_blank(), # Remove major grid lines
    panel.grid.minor = element_blank(), # Remove minor grid lines
    panel.border = element_rect(color = "black", fill = NA)
    #aspect.ratio = 1.3/1  # Adjust the aspect ratio to squeeze the plot
  )  +
  geom_vline(xintercept = which(colnames(opti_time)[-c(1,2)] == "15min"), linetype = "dashed", color = "gray") +
  scale_x_discrete(labels = function(x) sub("min", "", x)) + # Reduce space on x-axis
  scale_y_continuous(labels = function(x) sprintf("%.1f", x / 1e5)) + # Adjust y-axis labels
  labs(
    y = expression(bold(paste("Number of oPMNs/10ml (" , " x 10"^5, ")")))
  ) +
  guides(
    color = guide_legend(override.aes = list(linetype = c("solid", "solid", "solid", "dashed", "dashed", "dashed")), title = NULL),
    linetype = "none"  # Remove separate linetype legend
  )

#p2

########################
# Read the data
opti_cornstarch <- read.xlsx("optimization cornstarch.xlsx", colNames = TRUE)

# Extract the standard filter values and multiply by 2.18 * 10000
standard_filter_values <- as.numeric(opti_cornstarch[1:6, 2]) * (2.18 * 10000)

# Remove the "Standard.Filter" column and convert Cornstarch concentration columns
opti_cornstarch <- opti_cornstarch[, -2]
opti_cornstarch[, -1] <- opti_cornstarch[, -1] * (2.18 * 10000)

# Extract the values for oPMNs, Epithelial, and Debri
values_opmns <- as.numeric(opti_cornstarch[1, -1])
values_epithelial <- as.numeric(opti_cornstarch[2, -1])
values_debri <- as.numeric(opti_cornstarch[3, -1])

# Extract the standard deviations for oPMNs, Epithelial, and Debri
sd_opmns <- as.numeric(opti_cornstarch[4, -1]) # Row for O-SD
sd_epithelial <- as.numeric(opti_cornstarch[5, -1]) # Row for E-SD
sd_debri <- as.numeric(opti_cornstarch[6, -1]) # Row for D-SD

# Create a data frame for plotting
plot_data <- data.frame(
  Corn_Starch_Concentration = rep(colnames(opti_cornstarch)[-1], times = 3),
  Value = c(values_opmns, values_epithelial, values_debri),
  SD = c(sd_opmns, sd_epithelial, sd_debri),
  Variable = rep(c("oPMNs", "Epithelial", "Debri"), each = length(values_opmns))
)

# Adjust the factor levels for Variable to show oPMNs first
plot_data$Variable <- factor(plot_data$Variable, levels = c("oPMNs", "Epithelial", "Debri"))

# Convert Corn_Starch_Concentration to numeric (remove "mg(CS)/ml")
plot_data$Corn_Starch_Concentration <- gsub("mg\\(CS\\)/ml", "", plot_data$Corn_Starch_Concentration)

# Make sure Corn_Starch_Concentration is numeric
plot_data$Corn_Starch_Concentration <- as.numeric(as.character(plot_data$Corn_Starch_Concentration))

# Create the line plot with adjustments
p3 <- ggplot(plot_data, aes(x = Corn_Starch_Concentration, y = Value, group = Variable, color = Variable, linetype = Variable)) +
  geom_point(size = 3) +
  geom_line(size = 1) +

  # Add horizontal lines for the control values
  geom_hline(aes(yintercept = standard_filter_values[1], color = "oPMNs (Cont)", linetype = "oPMNs (Cont)"), size = 0.7) +
  geom_hline(aes(yintercept = standard_filter_values[2], color = "Epithelial (Cont)", linetype = "Epithelial (Cont)"), size = 0.7) +
  geom_hline(aes(yintercept = standard_filter_values[3], color = "Debris (Cont)", linetype = "Debris (Cont)"), size = 0.7) +

  # Use numeric values for the x-axis scale
  scale_x_continuous(breaks = seq(0.2, 1, by = 0.2)) +  # Ensure the correct x-axis range and ticks

  # Correct the geom_vline for 0.6 mg(CS)/ml
  geom_vline(xintercept = 0.6, linetype = "dashed", color = "gray") +  # Directly use 0.6 as the xintercept

  scale_color_manual(
    values = c("oPMNs" = "navyblue", "Epithelial" = "#B8860B", "Debri" = "brown",
               "oPMNs (Cont)" = "navyblue", "Epithelial (Cont)" = "#B8860B", "Debris (Cont)" = "brown"),
    labels = c("oPMNs" = "oPMNs", "Epithelial" = "Epithelial", "Debri" = "Debris",
               "oPMNs (Cont)" = "oPMNs (Cont)", "Epithelial (Cont)" = "Epithelial (Cont)", "Debris (Cont)" = "Debris (Cont)")
  ) +

  scale_linetype_manual(
    values = c("oPMNs" = "solid", "Epithelial" = "solid", "Debri" = "solid",
               "oPMNs (Cont)" = "dashed", "Epithelial (Cont)" = "dashed", "Debris (Cont)" = "dashed")
  ) +

  labs(x = "Concentration (mg/ml)") +
  theme_minimal() +
  theme(
    axis.text = element_text(size = 12, family = "Times New Roman"),
    axis.title = element_text(size = 12, family = "Times New Roman"),
    legend.text = element_text(size = 10, family = "Times New Roman"),
    legend.position = c(0.4, 0.4),
    legend.direction = "vertical",
    legend.spacing.y = unit(0.02, 'cm'),
    legend.key.size = unit(0.8, 'lines'),
    legend.title = element_text(hjust = 0.5, size = 25, family = "Times New Roman"),
    panel.background = element_blank(),
    plot.background = element_blank(),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    panel.border = element_rect(color = "black", fill = NA)
  #  aspect.ratio = 1.3/1
  ) +

  scale_y_continuous(labels = function(x) sprintf("%.1f", x / 1e5)) +

  labs(
    y = expression(bold(paste("Number of oPMNs/10ml (" , " x 10"^5, ")")))
  ) +

  guides(
    color = guide_legend(override.aes = list(linetype = c("solid", "solid", "solid", "dashed", "dashed", "dashed")), title = NULL),
    linetype = "none"
  )

#p3

####################################
library(reshape2)
library(scales)
library(grid)
library(gridExtra)

# Read data
PBS <- read.xlsx("PBS_Wash_Data.xlsx", colNames = TRUE)

# Convert the first row to numeric and adjust by multiplying by 2.18 * 10000 (21800)
PBS[1, 2:9] <- as.numeric(PBS[1, 2:9]) * 21800
PBS[2, 2:9] <- as.numeric(PBS[2, 2:9]) * 21800

# Prepare data for ggplot2
PBS_main <- PBS[1, ]
PBS_main <- PBS_main %>%
  rename_with(~c("PBS.concentration", "aPBS_Before", "aPBS_After", "PBS_33W_Before", "PBS_33W_After", "PBS_66W_Before", "PBS_66W_After", "Water_Before", "Water_After"), everything())

PBS_long <- melt(PBS_main, id.vars = "PBS.concentration", variable.name = "Condition", value.name = "Concentration")

# Prepare standard deviation data
PBS_sd <- PBS[2, ]
PBS_sd <- PBS_sd %>%
  rename_with(~c("PBS.concentration", "aPBS_Before", "aPBS_After", "PBS_33W_Before", "PBS_33W_After", "PBS_66W_Before", "PBS_66W_After", "Water_Before", "Water_After"), everything())

PBS_sd_long <- melt(PBS_sd, id.vars = "PBS.concentration", variable.name = "Condition", value.name = "St_deviation")

# Adjust factor levels
PBS_long$Condition <- factor(PBS_long$Condition, levels = c("aPBS_Before", "aPBS_After", "PBS_33W_Before", "PBS_33W_After", "PBS_66W_Before", "PBS_66W_After", "Water_Before", "Water_After"))
PBS_sd_long$Condition <- factor(PBS_sd_long$Condition, levels = c("aPBS_Before", "aPBS_After", "PBS_33W_Before", "PBS_33W_After", "PBS_66W_Before", "PBS_66W_After", "Water_Before", "Water_After"))

# Combine the data frames
PBS_combined <- merge(PBS_long, PBS_sd_long, by = "Condition")

# Define new group labels and reverse the order
PBS_combined$Group <- factor(c('100', '100', '66', '66', '33', '33', '0', '0'), levels = c('0', '33', '66', '100'))

# Define a new variable for Before and After
PBS_combined$Wash <- factor(ifelse(grepl("Before", PBS_combined$Condition), "Before", "After"), levels = c("Before", "After"))

# Read data
PBS <- read.xlsx("PBS_Wash_Data.xlsx", colNames = TRUE)

# Convert the first row to numeric and adjust by multiplying by 2.18 * 10000 (21800)
PBS[1, 2:9] <- as.numeric(PBS[1, 2:9]) * 21800
PBS[2, 2:9] <- as.numeric(PBS[2, 2:9]) * 21800

# Prepare data for ggplot2
PBS_main <- PBS[1, ]
PBS_main <- PBS_main %>%
  rename_with(~c("PBS.concentration", "aPBS_Before", "aPBS_After", "PBS_33W_Before", "PBS_33W_After", "PBS_66W_Before", "PBS_66W_After", "Water_Before", "Water_After"), everything())

PBS_long <- melt(PBS_main, id.vars = "PBS.concentration", variable.name = "Condition", value.name = "Concentration")

# Prepare standard deviation data
PBS_sd <- PBS[2, ]
PBS_sd <- PBS_sd %>%
  rename_with(~c("PBS.concentration", "aPBS_Before", "aPBS_After", "PBS_33W_Before", "PBS_33W_After", "PBS_66W_Before", "PBS_66W_After", "Water_Before", "Water_After"), everything())

PBS_sd_long <- melt(PBS_sd, id.vars = "PBS.concentration", variable.name = "Condition", value.name = "St_deviation")

# Adjust factor levels
PBS_long$Condition <- factor(PBS_long$Condition, levels = c("aPBS_Before", "aPBS_After", "PBS_33W_Before", "PBS_33W_After", "PBS_66W_Before", "PBS_66W_After", "Water_Before", "Water_After"))
PBS_sd_long$Condition <- factor(PBS_sd_long$Condition, levels = c("aPBS_Before", "aPBS_After", "PBS_33W_Before", "PBS_33W_After", "PBS_66W_Before", "PBS_66W_After", "Water_Before", "Water_After"))

# Combine the data frames
PBS_combined <- merge(PBS_long, PBS_sd_long, by = c("Condition"))

# Define new group labels and reverse the order
PBS_combined$Group <- factor(c('100', '100', '66', '66', '33', '33', '0', '0'), levels = c('0', '33', '66', '100'))

# Define a new variable for Before and After
PBS_combined$Wash <- factor(ifelse(grepl("Before", PBS_combined$Condition), "Before", "After"), levels = c("Before", "After"))
PBS_combined$Group <- as.numeric(as.character(PBS_combined$Group))
PBS_combined <- PBS_combined[order(PBS_combined$Group), ]  # Sort by Group

# Line plot with curves for Before and After
p4 <- ggplot(PBS_combined, aes(x = Group, y = Concentration, color = Wash, group = Wash)) +
  geom_line(size = 1.2) +   # Add lines connecting points for curves
  geom_point(size = 3) +  # Add points for each condition
 # geom_errorbar(aes(ymin = Concentration - St_deviation, ymax = Concentration + St_deviation), width = 0.2) +  # Add error bars
  scale_color_manual(values = c("Before" = "navyblue", "After" = "#B8860B"),
                     labels = c("Before" = "BW", "After" = "AW")) +  # Custom color mapping and legend labels
  scale_y_continuous(labels = function(x) sprintf("%.1f", x / 1e5)) + # Adjust y-axis labels
   labs(x = "PBS Concentration (%)",
        y = expression(bold(paste("Number of oPMNs/10ml (" , " x 10"^5, ")"))),
       color = "Wash",
       shape = "Wash") +
  geom_vline(xintercept = 100, linetype = "dashed", color = "gray") +
  theme_minimal() +
  theme(
    axis.text = element_text(size = 12, family = "Times New Roman"),
    axis.title = element_text(size = 12, family = "Times New Roman"),
    legend.text = element_text(size = 11, family = "Times New Roman"),
    legend.position = c(0.3, 0.3),
    legend.direction = "vertical",
    legend.spacing.y = unit(0.02, 'cm'),
    legend.key.size = unit(0.8, 'lines'),
    legend.title =element_blank(),
    panel.background = element_blank(),
    plot.background = element_blank(),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    panel.border = element_rect(color = "black", fill = NA),
    #aspect.ratio = 1.8/1
  )

# Display the plot
#print(p4)
#######################
PBS<- read.xlsx("PBS adjusted.xlsx", colNames = TRUE)
colnames(PBS) <- c("PBS.concentration", "5", "10", "15", "20", "25")
# Adjust the data by multiplying by 2.18 * 10000 (21800)
PBS[1:2, 2:6] <- PBS[1:2, 2:6] * (2.18 * 10000)
PBS[3:4, 2:6] <- PBS[3:4, 2:6] * (2.18 * 10000)
# Reshape the main data for ggplot2
PBS_main <- PBS[1:2, ]
PBS_long <- melt(PBS_main, id.vars = "PBS.concentration",
                 variable.name = "Volume",
                 value.name = "Concentration")

# Create the standard deviation data frame
PBS_sd <- PBS[3:4, ]
colnames(PBS_sd) <- c("PBS.concentration", "5", "10", "15", "20", "25")
PBS_sd$PBS.concentration <- c("Befor wash", "After wash")
PBS_sd_long <- melt(PBS_sd, id.vars = "PBS.concentration",
                    variable.name = "Volume",
                    value.name = "St_deviation")

# Adjust the order of the factor levels for PBS.concentration
PBS_long$PBS.concentration <- factor(PBS_long$PBS.concentration, levels = c("Befor wash", "After wash"))
PBS_sd_long$PBS.concentration <- factor(PBS_sd_long$PBS.concentration, levels = c("Befor wash", "After wash"))

# Merge the long data frames
PBS_combined <- merge(PBS_long, PBS_sd_long, by = c("PBS.concentration", "Volume"))

PBS_combined$Volume <- as.numeric(as.character(PBS_combined$Volume))
PBS_combined <- PBS_combined[order(PBS_combined$Volume), ]  # Sort by Volume

p5 <- ggplot(PBS_combined, aes(x = Volume, y = Concentration, color = PBS.concentration, group = PBS.concentration)) +
  geom_line(size = 1.2) +  # Add lines connecting points for Before and After wash
  geom_point(size = 3) +  # Add points for each condition
  # Add a dashed vertical line at x = 10 mL
  geom_vline(xintercept = 10, linetype = "dashed", color = "gray") +
  scale_color_manual(values = c("Befor wash" = "navyblue", "After wash" = "#B8860B"),
                     labels = c("Befor wash" = "BW", "After wash" = "AW")) +  # Custom color mapping and legend labels
  scale_y_continuous(labels = function(x) sprintf("%.1f", x / 1e5)) +  # Adjust y-axis labels
  scale_x_continuous(breaks = c(5, 10, 15, 20, 25), limits = c(5, 25)) +  # Ensure the x-axis includes 10
  labs(x = "PBS Volume (mL)",
       y = expression(bold(paste("Number of oPMNs/10ml (" , " x 10"^5, ")"))),
       color = "Condition") +  # Legend title for color
  theme_minimal() +
  theme(
    axis.text = element_text(size = 11, family = "Times New Roman"),
    axis.title = element_text(size = 11, family = "Times New Roman"),
    legend.text = element_text(size = 10, family = "Times New Roman"),
    legend.position = c(0.24, 0.3),
    legend.direction = "vertical",
    legend.spacing.y = unit(0.02, 'cm'),
    legend.key.size = unit(0.8, 'lines'),
    legend.title = element_blank(),
    panel.background = element_blank(),
    plot.background = element_blank(),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    panel.border = element_rect(color = "black", fill = NA),
   #aspect.ratio = 1.8/1
  )

# Display the plot
#print(p5)

######################
library(dplyr)
library(ggplot2)
library(openxlsx)
library(tidyr)
library(scales)
library(extrafont)
Biosa <- read.xlsx("biosa compair.xlsx", colNames = TRUE)

# Extract means and standard deviations for Biosa and Hemo only
observed <- Biosa[2:3, -1] * (2.18 * 10000)
sds <- Biosa[5:6, -1] * (2.18 * 10000)
overall_means <- colMeans(observed)

# Calculate residuals (subtracting overall mean from each observed value)
residuals <- t(apply(observed, 1, function(row) row - overall_means))

# Convert data to long format for ggplot
residuals_long <- as.data.frame(residuals) %>%
  mutate(Method = Biosa$Quantitative.test[2:3]) %>%
  gather(key = "Time", value = "Residual", -Method)

# Relevel Method factor to ensure correct ordering
residuals_long$Method <- factor(residuals_long$Method, levels = c("BioSA", "Hemo"), labels = c("DePerio", "Control"))

P6 <- ggplot(residuals_long, aes(x = Method, y = Residual)) +
  geom_boxplot(fill = "white", color = "black") +
  geom_jitter(aes(color = Method),
               position = position_jitter(0.2), alpha = 2, size = 2) +
    # Keep boxplot in white with black border
  stat_summary(fun = mean, geom = "point", shape = 15, size = 3, aes(color = "Mean")) +  # Add mean marker
  stat_summary(fun = max, geom = "point", shape = 17, size = 3, aes(color = "Max")) +  # Add max marker
  stat_summary(fun = min, geom = "point", shape = 16, size = 3, aes(color = "Min")) +  # Add min marker
  scale_color_manual(values = c("DePerio" = "navyblue", "Control" = "#B8860B", "Mean" = "black", "Max" = "black", "Min" = "black"),
                     breaks = c("Mean", "Max", "Min"),
                     labels = c("Mean", "Max", "Min")) +
  theme_minimal() +
  labs(x = "Condition",
       y = expression(bold(paste("Residuals (" , " x 10"^5, ")")))
       ) +
  theme(
    axis.text = element_text(size = 12, family = "Times New Roman"),
    axis.title = element_text(size = 12, family = "Times New Roman"),
    legend.text = element_text(size = 12, family = "Times New Roman"),
    legend.position = c(0.25, 0.93),
    legend.direction = "vertical", # Arrange legend items vertically
    legend.spacing.y = unit(0.02, 'cm'), # Further reduce space between legend lines
    legend.key.size = unit(0.8, 'lines'), # Reduce the size of the legend key
    legend.title = element_blank(),
    panel.background = element_blank(), # Remove background color
    plot.background = element_blank(), # Remove plot background color
    panel.grid.major = element_blank(), # Set major grid lines to gray
    panel.grid.minor =  element_blank(), # Set minor grid lines to light gray
    panel.border = element_rect(color = "black", fill = NA)
    #aspect.ratio = 1.2/1
  ) +
  scale_y_continuous(labels = function(x) sprintf("%.1f", x / 1e5))


#P6
##############

################################
# Load necessary libraries
library(readxl)
library(lme4)
library(extrafont)
#font_import()
loadfonts(device = "win")
##fonts()
# Load the data
file_path <- "biosa compair.xlsx"
df <- read_excel(file_path)

# Extract BioSA and Hemo values and adjust them
adjustment_factor <- 2.18 * 10000
biosa <- as.numeric(df[2, 2:31]) * adjustment_factor
hemo <- as.numeric(df[3, 2:31]) * adjustment_factor

# Create a long-format dataframe for the mixed-effects model
time_points <- 1:30  # Assuming T1 to T30 are the time points
long_df <- data.frame(
  Value = c(biosa, hemo),
  Test = factor(rep(c("Biosa", "Hemo"), each = length(biosa))),
  Time = rep(time_points, times = 2),
  Subject = factor(rep(1, length(biosa) * 2))  # Assuming one subject, adjust if there are more
)

model <- lm(Value ~ Test * Time, data = long_df)
summary(model)
## 
## Call:
## lm(formula = Value ~ Test * Time, data = long_df)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -98705 -26590  -4869  29617  83129 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   681099.7    14891.8  45.737  < 2e-16 ***
## TestHemo       65432.6    21060.2   3.107  0.00297 ** 
## Time             241.8      838.8   0.288  0.77425    
## TestHemo:Time  -1807.0     1186.3  -1.523  0.13332    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 39770 on 56 degrees of freedom
## Multiple R-squared:  0.2313, Adjusted R-squared:  0.1901 
## F-statistic: 5.616 on 3 and 56 DF,  p-value: 0.001942
# Create a dataframe for plotting
plot_df <- data.frame(
  Time = rep(time_points, times = 2),
  Value = c(biosa, hemo),
  Test = rep(c("Biosa", "Hemo"), each = 30)
)
plot_df$Test <- factor(plot_df$Test, levels = c("Biosa", "Hemo"), labels = c("DePerio", "Control"))
# Plot the time series
p7 <- ggplot(plot_df, aes(x = Time, y = Value, color = Test, group = Test)) +
  geom_line() +
  geom_point() +
  labs(
    x = "Tests (Day)",
    y = expression(bold(paste("Number of oPMNs/10ml (" , " x 10"^5, ")")))
  ) +
  theme_minimal() +
  scale_y_continuous(labels = function(x) sprintf("%.1f", x / 1e5)) + # Adjust y-axis labels
  theme(
    axis.text = element_text(size = 12, family = "Times New Roman"),
    axis.title = element_text(size = 12, family = "Times New Roman"),
    legend.text = element_text(size = 12, family = "Times New Roman", margin = margin(r = 10)),
    legend.position = c(0.25, 0.96),
    legend.direction = "vertical", # Arrange legend items vertically
    legend.spacing.y = unit(0.02, 'cm'), # Further reduce space between legend lines
    legend.key.size = unit(0.8, 'lines'), # Reduce the size of the legend key
    legend.title = element_blank(),
    panel.background = element_blank(), # Remove background color
    plot.background = element_blank(), # Remove plot background color
    panel.grid.major = element_blank(), # Set major grid lines to gray
    panel.grid.minor = element_blank(), # Remove minor grid lines
    panel.border = element_rect(color = "black", fill = NA)
    #aspect.ratio = 1.2/1
  ) +
  scale_color_manual(values = c("DePerio" = "navyblue", "Control" = "#B8860B")) +
  guides(fill = FALSE)

#p7
###########################
########################## Adjust the layout using layout_matrix
library(gridExtra)
library(grid)
library(gtable)
library(cowplot)

# Ensure font consistency in all plots
font_size <- 12
font_family <- "Times New Roman"

# Apply consistent theme with bold axis titles to all plots
p1 <- p1 + theme(text = element_text(size = font_size, family = font_family),
                 axis.title.x = element_text(face = "bold"),
                 axis.title.y = element_text(face = "bold", margin = margin(r = 20)),
                 plot.margin = margin(t = 8, r = 20, b = 30, l = 8)
                 )

p2 <- p2 + theme(text = element_text(size = font_size, family = font_family),
                 axis.title.x = element_text(face = "bold"),
                 legend.position = c(0.1, 0.4),
                 axis.title.y = element_text(face = "bold", margin = margin(r = 20)),
                 plot.margin = margin(t = 8, r = 20, b = 30, l = 20))

p3 <- p3 + theme(text = element_text(size = font_size, family = font_family),
                 axis.title.x = element_text(face = "bold"),
                 axis.title.y = element_text(face = "bold", margin = margin(r = 20)),
                 plot.margin = margin(t = 8, r = 20, b = 30, l = 20),
                 legend.position = c(0.1, 0.4))

p4 <- p4 + theme(text = element_text(size = font_size, family = font_family),
                 axis.title.x = element_text(face = "bold"),
                 legend.position = c(0.1, 0.3),
                 axis.title.y = element_text(face = "bold", margin = margin(r = 20)),
                 plot.margin = margin(t = 8, r = 15, b = 8, l = 15))  # Prevent overlap with axis title

p5 <- p5 + theme(text = element_text(size = font_size, family = font_family),
                 axis.title.x = element_text(face = "bold"),
                 axis.title.y = element_text(face = "bold", margin = margin(r = 20)),
                 plot.margin = margin(t = 8, r = 15, b = 8, l = 15),
                 legend.position = c(0.1, 0.3))  # Prevent overlap with axis title

P6 <- P6 + theme(text = element_text(size = font_size, family = font_family),
                 axis.title.x = element_text(face = "bold"),
                 axis.title.y = element_text(face = "bold", margin = margin(r = 20)),
                 plot.margin = margin(t = 8, r = 15, b = 8, l = 15),
                 legend.position = c(0.1, 0.93))

p7 <- p7 + theme(text = element_text(size = font_size, family = font_family),
                 axis.title.x = element_text(face = "bold"),
                 axis.title.y = element_text(face = "bold", margin = margin(r = 20)),
                 plot.margin = margin(t = 8, r = 15, b = 8, l = 15),
                 legend.position = c(0.1, 0.95))



c1 = plot_grid(p1, p2,p3, ncol = 3, rel_widths = c(1, 1,1), align = "v", axis = "tb")
c2=plot_grid(p4 , p5, P6 ,p7, ncol = 4, rel_widths = c(1, 1, 1,1), align = "v", axis = "tb")

# Arrange the plots in 2 rows and 3 columns, adding the combined p4_p5 plot
q <- plot_grid(
  c1,
  c2,
  nrow = 2, align = "v", axis = "tb"
)

# Save the final arranged plot to a file
ggsave("arrange.png", plot = q, width = 11, height = 8)
knitr::include_graphics("arrange.png")

4. Clinical results

4.1 Figure S4.1.

rm(list=ls())
library(ggplot2)
library(reshape2)
library(extrafont)
loadfonts(device = "win")

# Create the data frame
data <- data.frame(
  Condition = 1:21, # Treat each condition as a unique identifier
  BIOSA = c(4.1,4.4,4.5, 5.5, 5.4, 6, 8.8, 8.1, 9, 9.1, 14.2, 17.5, 18.5, 21.1, 27, 34.7, 36.8, 43.3, 52.1, 68.7, 89.1),
  Hemo = c(4.7,5.1,5.8, 5.5, 6, 6.5, 9, 9.2, 9.5, 9.5, 15, 20, 20, 30, 31, 35, 39, 42, 57, 75, 90),
  B_STDEV = c(5.872818744,5.381449619,6.152235366, 6.64529909, 2.872281323, 8.660254038, 4.369, 4.124318125, 2.968164416, 7.777531742, 9.531002046, 7.386474125, 6.487680633, 8.366600265, 5.518151865, 6.57951366, 6.770524352, 8.525843067, 5.196152423, 7.133722731, 3.269556545)
)

# Melt the data frame for ggplot2
data_melted <- melt(data, id.vars = "Condition", measure.vars = c("Hemo", "BIOSA"), variable.name = "Type", value.name = "Value")

# Plot the data
p <- ggplot(data, aes(x = Condition)) +
  geom_line(aes(y = BIOSA, color = "BIOSA"), size = 1) +
  geom_point(aes(y = BIOSA, color = "BIOSA"), size = 2) +
  geom_errorbar(aes(ymin = BIOSA - B_STDEV, ymax = BIOSA + B_STDEV, color = "BIOSA"), width = 0.2) +
  geom_line(aes(y = Hemo, color = "Hemo"), size = 1) +
  geom_point(aes(y = Hemo, color = "Hemo"), size = 2) +
  scale_color_manual(values = c("BIOSA" = "red", "Hemo" = "blue"), labels = c("DePerio", "Control")) +
  labs(x = "Tests", y = "Number of OPMNs Per Image", color = "") +
  theme_minimal() +
  theme(
    axis.text = element_text(size = 12, family = "Times New Roman"),
    axis.title = element_text(size = 12, family = "Times New Roman",face="bold"),
    legend.text = element_text(size = 12, family = "Times New Roman"),
    legend.position = "top",
    legend.direction = "horizontal",
    legend.title = element_blank(),
    panel.background = element_blank(),
    plot.background = element_blank(),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    panel.border = element_rect(color = "black", fill = NA)
  ) +
  scale_x_continuous(breaks = c(1, 3, 6, 9, 12, 15, 18, 21)) # Custom breaks and labels

p

4.2 Figure S4.2.

########## ROC CURVE ###########
rm(list=ls())

# Load necessary libraries
if (!requireNamespace("patchwork", quietly = TRUE)) {
  install.packages("patchwork")
}
if (!requireNamespace("PRROC", quietly = TRUE)) {
  install.packages("PRROC")
}
library(openxlsx)
library(tidyr)
library(dplyr)
library(pROC)
library(ggplot2)

# Load data
data <- read.xlsx("Categorize_data2.xlsx", colNames = TRUE)

# Reshape from wide to long format
df_long <- pivot_longer(data[c(-2,-3),], cols = -1, names_to = "Group", values_to = "Value")

# Ensure Value is numeric
df_long$Value <- as.numeric(df_long$Value)

# Define function to calculate ROC curve for each group and return a data frame for plotting
calculate_roc <- function(group_name) {
  df_long <- df_long %>%
    mutate(BinaryOutcome = ifelse(grepl(group_name, Group), 1, 0))

  # Check if BinaryOutcome has two levels
  if(length(levels(as.factor(df_long$BinaryOutcome))) != 2) {
    stop(paste("BinaryOutcome does not have exactly two levels for group", group_name))
  }

  # Calculate ROC curve
  roc_curve <- roc(df_long$BinaryOutcome, df_long$Value)

  # Create a data frame with ROC curve data for ggplot
  roc_df <- data.frame(
    Specificity = roc_curve$specificities,
    Sensitivity = roc_curve$sensitivities,
    Group = group_name
  )

  return(roc_df)
}

# Groups to calculate ROC curves for
groups <- c("Healthy", "Gingivitis", "Mild.Periodontitis", "Moderate.Periodontitis", "Severe.Periodontitis")

# Calculate ROC curve for each group and combine into a single data frame
roc_data <- do.call(rbind, lapply(groups, calculate_roc))

# Convert Group to a factor with the desired order
roc_data$Group <- factor(roc_data$Group, levels = groups)

# Mapping of groups to numbers
group_labels <- c("Healthy" = "group1", "Gingivitis" = "group2", "Mild.Periodontitis" = "group3", "Moderate.Periodontitis" = "group4", "Severe.Periodontitis" = "group5")

# Plot ROC curves using ggplot2
roc_plot <- ggplot(roc_data, aes(x = 1 - Specificity, y = Sensitivity, color = Group)) +
  geom_line(size = 1) +
  geom_abline(linetype = "dashed") +
  labs(x = "1 - Specificity", y = "Sensitivity") +
  theme_minimal() +
  theme(
    axis.text = element_text(size = 12, family = "Times New Roman"),
    axis.title = element_text(size = 12, family = "Times New Roman",face="bold"),
    legend.text = element_text(size = 12, family = "Times New Roman"),
    strip.text = element_text(size = 12, family = "Times New Roman"),
    legend.title = element_blank(),
    panel.background = element_blank(), # Remove background color
    plot.background = element_blank(), # Remove plot background color
    panel.grid.major = element_blank(), # Remove major grid lines
    panel.grid.minor = element_blank(), # Remove minor grid lines
    panel.border = element_rect(color = "black", fill = NA),
    legend.position = 'top',
    legend.direction = 'horizontal'
  ) +
  scale_color_manual(values = c("Healthy" = "yellow", "Gingivitis" = "red", "Mild.Periodontitis" = "green", "Moderate.Periodontitis" = "blue", "Severe.Periodontitis" = "purple"),
                     labels = group_labels) +
  guides(color = guide_legend(nrow = 1))

# Print the ROC plot
print(roc_plot)

4.3 Figure S4.3

########## PR CURVE ###########
rm(list=ls())

# Load necessary libraries
if (!requireNamespace("patchwork", quietly = TRUE)) {
  install.packages("patchwork")
}
if (!requireNamespace("PRROC", quietly = TRUE)) {
  install.packages("PRROC")
}
library(openxlsx)
library(tidyr)
library(dplyr)
library(PRROC)
library(ggplot2)

# Load data
data <- read.xlsx("Categorize_data2.xlsx", colNames = TRUE)

# Reshape from wide to long format
df_long <- pivot_longer(data[c(-2,-3),], cols = -1, names_to = "Group", values_to = "Value")

# Ensure Value is numeric
df_long$Value <- as.numeric(df_long$Value)

# Define function to calculate PR curve for each group and return a data frame for plotting
calculate_pr <- function(group_name) {
  df_long <- df_long %>%
    mutate(BinaryOutcome = ifelse(grepl(group_name, Group), 1, 0))

  # Check if BinaryOutcome has two levels
  if(length(levels(as.factor(df_long$BinaryOutcome))) != 2) {
    stop(paste("BinaryOutcome does not have exactly two levels for group", group_name))
  }

  # Calculate PR curve
  pr_curve <- pr.curve(scores.class0 = df_long$Value[df_long$BinaryOutcome == 1],
                       scores.class1 = df_long$Value[df_long$BinaryOutcome == 0],
                       curve = TRUE)

  # Create a data frame with PR curve data for ggplot
  pr_df <- data.frame(
    Recall = pr_curve$curve[, 1],
    Precision = pr_curve$curve[, 2],
    Group = group_name
  )

  return(pr_df)
}

# Groups to calculate PR curves for
groups <- c("Healthy", "Gingivitis", "Mild.Periodontitis", "Moderate.Periodontitis", "Severe.Periodontitis")

# Calculate PR curve for each group and combine into a single data frame
pr_data <- do.call(rbind, lapply(groups, calculate_pr))

# Convert Group to a factor with the desired order
pr_data$Group <- factor(pr_data$Group, levels = groups)

# Mapping of groups to numbers
group_labels <- c("Healthy" = "group1", "Gingivitis" = "group2", "Mild.Periodontitis" = "group3", "Moderate.Periodontitis" = "group4", "Severe.Periodontitis" = "group5")

# Plot PR curves using ggplot2
pr_plot <- ggplot(pr_data, aes(x = Recall, y = Precision, color = Group)) +
  geom_line(size = 1) +
  labs(x = "Recall", y = "Precision") +
  theme_minimal() +
  theme(
    axis.text = element_text(size = 12, family = "Times New Roman"),
    axis.title = element_text(size = 12, family = "Times New Roman",face="bold"),
    legend.text = element_text(size = 12, family = "Times New Roman"),
    strip.text = element_text(size = 12, family = "Times New Roman"),
    legend.title = element_blank(),
    panel.background = element_blank(), # Remove background color
    plot.background = element_blank(), # Remove plot background color
    panel.grid.major = element_blank(), # Remove major grid lines
    panel.grid.minor = element_blank(), # Remove minor grid lines
    panel.border = element_rect(color = "black", fill = NA),
    legend.position = 'top',
    legend.direction = 'horizontal'
  ) +
  scale_color_manual(values = c("Healthy" = "yellow", "Gingivitis" = "red", "Mild.Periodontitis" = "green", "Moderate.Periodontitis" = "blue", "Severe.Periodontitis" = "purple"),
                     labels = group_labels) +
  guides(color = guide_legend(nrow = 1))

# Print the PR plot
print(pr_plot)

4.4 Figure S4.4.

library(dplyr)
library(ggplot2)
library(openxlsx)
library(ggpubr)
library(tidyr)
library(extrafont)
font_import()
## Importing fonts may take a few minutes, depending on the number of fonts and the speed of the system.
## Continue? [y/n]
loadfonts(device = "win")
#fonts()
# Read data from Excel
data <- read.xlsx("Adjusted data.xlsx", colNames = TRUE)

# Pivot the data to a long format
data_long <- pivot_longer(data,
                          cols = -Info,
                          names_to = "Category",
                          values_to = "Value")

# Remove leading and trailing whitespace from Info column
data_long$Info <- trimws(data_long$Info)

# Separate data into measurements and standard deviations
measurements <- data_long %>%
  filter(Info == "BIOSA Method") %>%
  select(-Info)

stdevs <- data_long %>%
  filter(Info == "B-STDEV") %>%
  select(-Info)

measurements2 <- data_long %>%
  filter(Info == "Hemo") %>%
  select(-Info)

# Merge the datasets
data_final <- left_join(measurements, stdevs, by = "Category", suffix = c(".mean", ".stdev"))
data_final <- left_join(data_final, measurements2, by = "Category")

# Create a simpler category label if necessary
data_final$Category <- gsub("_\\d+", "", data_final$Category)
data_final$Category <- gsub("\\.", " ", data_final$Category)

# Calculate the count of observations for each category
data_final <- data_final %>%
  group_by(Category) %>%
  mutate(n = n()) %>%
  ungroup()

# Perform Wilcoxon rank-sum tests between each pair of categories
category_pairs <- combn(unique(data_final$Category), 2, simplify = FALSE)
results <- lapply(category_pairs, function(pair) {
  cat1 <- pair[1]
  cat2 <- pair[2]
  test_result <- wilcox.test(Value.mean ~ Category, data = data_final,
                             subset = Category %in% c(cat1, cat2),
                             alternative = "two.sided")
  return(test_result)
})

#### Box plots ####
data_final$Category <- factor(data_final$Category, levels = c(
  "Healthy", "Gingivitis", "Mild periodontal disease", "Moderate PD", "Severe"
))

custom_colors <- c("Healthy" = "#006400", # Dark green
                   "Gingivitis" = "#66c2a4", # Lighter green
                   "Mild periodontal disease" = "#fc8d62", # Light orange
                   "Moderate PD" = "#e31a1c", # Dark orange
                   "Severe" = "#8B0000") # Dark red

x_labels <- c("N=6\n(1)", "N=4\n(2)", "N=4\n(3)", "N=4\n(4)", "N=3\n(5)")

# Calculate the mean of Value.stdev for each Category
data_final <- data_final %>%
  group_by(Category) %>%
  mutate(mean = mean(Value.mean, na.rm = TRUE)) %>%
  mutate(mean_stdev = mean(Value.stdev, na.rm = TRUE)) %>%
  ungroup()

# Adding error bars based on the mean standard deviation per category
p <- ggplot(data_final, aes(x = Category, y = Value.mean)) +
  geom_jitter(aes(color = "gray"),
              position = position_jitter(0.2), alpha = 1, size=3) +
  geom_boxplot(aes(fill = Category)) +
  # Adding error bars based on the mean standard deviation for each category
  geom_errorbar(aes(ymin = mean - mean_stdev, ymax = mean + mean_stdev), 
                width = 0.2) +
  scale_fill_manual(values = custom_colors) +
  stat_summary(aes(color = "Mean"), fun = mean, geom = "point", shape = 15, size = 2) +  # Add mean marker
  stat_summary(aes(color = "Max"), fun = max, geom = "point", shape = 17, size = 2) +  # Add max marker
  stat_summary(aes(color = "Min"), fun = min, geom = "point", shape = 16, size = 2) +  # Add min marker
  scale_color_manual(name = "Statistics labels", values = c("Mean" = "black", "Max" = "black", "Min" = "black")) +
  theme_minimal()

categories <- c("Healthy", "Gingivitis", "Mild periodontal disease", "Moderate PD", "Severe")
comparisons <- combn(categories, 2, simplify = FALSE)

p <- p + stat_compare_means(comparisons = comparisons, 
                            method = "wilcox.test", method.args = list(exact = TRUE), size = 3.2, family = "Times New Roman")

final_plot <- p + labs(
  x = "Groups"
) +
  theme_minimal() +
  theme(
    axis.text= element_text(size = 12, family = "Times New Roman"),
    axis.title = element_text(size = 12, family = "Times New Roman", face="bold"),
    legend.text = element_text(size = 12, family = "Times New Roman"),
    legend.position = "top",  # Position legend in bottom right inside plot
    legend.direction = "horizontal",  # Arrange legend items vertically
    legend.spacing.y = unit(0.02, 'cm'),  # Further reduce space between legend lines
    legend.key.size = unit(0.8, 'lines'),  # Reduce the size of the legend key
    legend.title = element_blank(),
    panel.background = element_blank(), # Remove background color
    plot.background = element_blank(), # Remove plot background color
    panel.grid.major = element_blank(), # Set major grid lines to gray
    panel.grid.minor = element_blank(), # Set minor grid lines to light gray
    panel.border = element_rect(color = "black", fill = NA)
  )  +
  scale_y_continuous(labels = function(x) sprintf("%.1f", x / 1e5) # Adjust y-axis labels
  ) +
  scale_x_discrete(labels = x_labels) +
  labs(
    y = expression(bold(paste("Number of oPMNs/10ml (" , " x 10"^5, ")")))
  ) +
  guides(fill = FALSE)  # Remove legend for fill

print(final_plot)

4.5 Figure S4.5.

library(boot)
# Function to calculate mean for bootstrapping
mean_fun <- function(data, indices) {
  return(mean(data[indices]))
}

# Bootstrap confidence intervals
bootstrap_ci <- function(data, n_boot = 5000, conf_level = 0.95) {
  boot_result <- boot(data = data, statistic = mean_fun, R = n_boot)
  ci <- boot.ci(boot_result, type = "perc", conf = conf_level)
  return(ci$percent[4:5])
}

# Apply bootstrap for BIOSA Method
summary_data_biosa <- data_final %>%
  group_by(Category) %>%
  summarise(
    mean_value_biosa = mean(Value.mean, na.rm = TRUE),
    ci_lower_biosa = bootstrap_ci(Value.mean, conf_level = 0.95)[1],
    ci_upper_biosa = bootstrap_ci(Value.mean, conf_level = 0.95)[2],
    n = n()
  )


summary_data_hemo <- data_final %>%
  group_by(Category) %>%
  summarise(
    mean_value_hemo = mean(Value, na.rm = TRUE),
    ci_lower_hemo = bootstrap_ci(Value, conf_level = 0.95)[1],
    ci_upper_hemo = bootstrap_ci(Value, conf_level = 0.95)[2],
    n = n()
  )


# Merge Biosa and Hemo data
summary_data <- left_join(summary_data_biosa, summary_data_hemo, by = "Category")

x_labels <- c("N=6\n(1)", "N=4\n(2)", "N=4\n(3)", "N=4\n(4)", "N=3\n(5)")

# Plot the data with error bars
final_plot <- ggplot(summary_data) +
  geom_point(aes(x = Category, y = mean_value_biosa, color = "BioSA"), size = 4) +
  geom_errorbar(aes(x = Category, ymin = ci_lower_biosa, ymax = ci_upper_biosa, color = "BioSA"), width = 0.2) +
  geom_point(aes(x = Category, y = mean_value_hemo, color = "Hemo"), size = 3) +
   geom_errorbar(aes(x = Category, ymin = ci_lower_hemo, ymax = ci_upper_hemo, color = "Hemo"), width = 0.2) +
  scale_color_manual(values = c("BioSA" = "red", "Hemo" = "blue"), labels = c("DePerio", "Control")) +
  labs(
    x = "Groups"
  ) +
  theme_minimal() +
  theme(
    axis.text = element_text(size = 12, family = "Times New Roman"),
    axis.title = element_text(size = 12, family = "Times New Roman",face="bold"),
    legend.text = element_text(size = 12, family = "Times New Roman"),
    legend.position = "top", # Position the legend at the top
    legend.direction = "horizontal", # Arrange legend items horizontally
    legend.title = element_blank(),
panel.background = element_blank(), # Remove background color
plot.background = element_blank(), # Remove plot background color
panel.grid.major = element_blank(), # Set major grid lines to gray
panel.grid.minor = element_blank(), # Set minor grid lines to light gray
panel.border = element_rect(color = "black", fill = NA)

  ) +
  scale_y_continuous(labels = function(x) sprintf("%.1f", x / 1e5)) + # Adjust y-axis labels
  scale_x_discrete(labels = x_labels) +
   labs(
    y = expression(bold(paste("Number of oPMNs/10ml (" , " x 10"^5, ")")))
  ) +
  guides(fill = FALSE) # Remove legend for fill

final_plot

4.6 Figure 3.4

# Clear the workspace
rm(list=ls())

# Load necessary libraries
library(gridExtra)
library(magick)
library(grid)
library(openxlsx)
library(ggplot2)
library(tidyr)
library(dplyr)
library(extrafont)
library(RColorBrewer)
library(patchwork)
library(png)

# Load fonts
loadfonts(device = "win")

# Load data
data <- read.xlsx("Categorize_data2.xlsx", colNames = TRUE, sheet = 2)

# Reshape from wide to long format
df_long <- pivot_longer(data[c(-11,-12,-13),], cols = -1, names_to = "Group", values_to = "Value")

# Calculate group means and global mean (reference)
Biosa_means <- data[11, ]
Biosa_long <- pivot_longer(Biosa_means, cols = -1, names_to = "Group", values_to = "Biosa_mean")

SD <- data[13,]
SD_long = pivot_longer(SD, cols = -1, names_to = "Group", values_to = "B_STDEV")

# Extract and name means and standard deviations for Hemo and BIO_SA
Hemo_means <- data[12, ]
Hemo_long <- pivot_longer(Hemo_means, cols = -1, names_to = "Group", values_to = "Hemo_mean")

# Join means and standard deviations to df_long for use in plotting
df_long <- left_join(df_long, Biosa_long, by = "Group")
df_long <- left_join(df_long, Hemo_long, by = "Group")
df_long <- left_join(df_long, SD_long, by = "Group")

# Corrected plot code
df_long$Group <- factor(df_long$Group, levels = colnames(data))

# Convert the 'Images.x' column to a factor with numeric levels
df_long$Images.x <- as.factor(df_long$Images.x)
levels(df_long$Images.x) <- as.character(1:10)

# Calculate the overall minimum and maximum values for the y-axis
overall_min <- min(df_long$Value - df_long$B_STDEV, na.rm = TRUE)
overall_max <- max(df_long$Value + df_long$B_STDEV, na.rm = TRUE)

# Initialize an empty list to store the image grobs
image_grobs <- list()

# Loop through folders S1 to S21
for (i in 1:21) {
  # Define the folder
  image_folder <- paste0("s1-s21_images/S", i)
  
  # Define the file paths based on the folder number
  if (i < 10) {
    image_files <- c(file.path(image_folder, "1.jpg"),
                     file.path(image_folder, "2.jpg"),
                     file.path(image_folder, "3.jpg"))
  } else {
    image_files <- c(file.path(image_folder, "b.jpg"),
                     file.path(image_folder, "c.jpg"),
                     file.path(image_folder, "d.jpg"))
  }
  
  # Convert the images to grobs and add them to the list
  image_grobs[[i]] <- lapply(image_files, function(img) {
    img <- image_read(img)
    img <- image_scale(img, "x800")  # Scale to maintain aspect ratio and height of 400 pixels
    grid::rasterGrob(as.raster(img), width = unit(1, "npc"), height = unit(1, "npc"))
  })
}

# Define custom color palette
group_colors <- c(
  rep("#006400", 6), # Healthy_1 to Healthy_6
  rep("#66c2a4", 5), # Gingivitis_1 to Gingivitis_5
  rep("#fc8d62", 4), # Mild PD_6 to Mild PD_9
  rep("#e31a1c", 4), # Moderate PD_10 to Moderate PD_13
  rep("#8B0000", 2)  # Severe_14 to Severe_15
)

# Function to create individual plots for each group with consistent axis labels and repeated legends
create_group_plot <- function(group_name, color, index, arrow_image_vector) {
  df_group <- df_long %>% filter(Group == group_name)
  
  # Debugging output
 # message(paste("Creating plot for group:", group_name))
 # message(paste("Using arrow position:", arrow_image_vector[index]))
  
  # Initialize default vjust
  si_vjust <- 0.1  # Default vjust
  
  # Adjust vjust based on conditions
  if (index %in% c(8, 10, 12, 18)) {
    si_vjust <- 0.9
  } else if (index == 15) {
    si_vjust <- 0.2
  } 
  
  upper_bounder <- max(df_long %>% filter(Group == tail(levels(df_long$Group), 1)) %>% pull(Value))
  
  my_theme <- theme(
    axis.text = element_text(size = 40, family = "Times New Roman"),
    axis.title.y = element_text(size = 40, family = "Times New Roman", face="bold"),
    axis.title.x = element_text(size = 40 ,family = "Times New Roman", face="bold"),
    plot.margin = unit(c(2, 2, 2, 2), "cm"),  # Add more space around the plot
    legend.title = element_blank(), 
    legend.text = element_text(size = 35, family = "Times New Roman"),
    panel.background = element_blank(), # Remove background color
    plot.background = element_blank(), # Remove plot background color
    panel.grid.major = element_blank(), # Remove major grid lines
    panel.grid.minor = element_blank(), # Remove minor grid lines
    panel.border = element_rect(color = "black", fill = NA),
    strip.text.x = element_blank()  # Remove facet labels
  )
  
  # Add conditional theme adjustments for the legend
  if (index == 21) {
    my_theme <- my_theme + theme(
      legend.position = c(0.5, 0.95),
      legend.direction = "horizontal",
      legend.spacing.y = unit(0.02, 'cm')
    )
  } else {
    my_theme <- my_theme + theme(
      legend.position = c(0.35, 0.95),
      legend.direction = "vertical",
      legend.spacing.y = unit(0.02, 'cm')
    )
  }
  
  # Create the ggplot
  plot <- ggplot(df_group, aes(x = Images.x, y = Value, fill = Group)) +
    geom_col(position = "dodge", fill = color) +
    geom_errorbar(aes(ymin = Value - B_STDEV, ymax = Value + B_STDEV), position = position_dodge(0.9), width = 0.25) +
    geom_hline(aes(yintercept = Hemo_mean, color = "Hemo"), linetype = "dashed", linewidth = 3) +
    geom_hline(aes(yintercept = Biosa_mean, color = "BioSA"), linetype = "dashed", linewidth = 3) +
    
    # Add dashed line and arrow with the same color as the bar plot
    geom_segment(aes(x = arrow_image_vector[index], xend = arrow_image_vector[index], 
                     y = overall_min, yend = upper_bounder), 
                 linetype = "dashed", color = color, size = 2) +
    
    scale_fill_manual(values = group_colors, guide = 'none') +  # Remove the fill legend for Group
    scale_color_manual(name = "Means", values = c("Hemo" =  "navyblue", "BioSA" = "#B8860B"), labels = c("DePerio(mean)", "Control")) +
    labs(x = "Images", y = expression(bold(paste("oPMNs Load/10ml (" , " x 10"^5, ")")))) +
    
    # Adjust margins for more space
    theme_minimal() +
    my_theme +
    scale_y_continuous(labels = scales::comma_format(scale = 1 / 10^5, accuracy = 1), limits = c(overall_min, overall_max))
  
  return(plot)
}

# Create the combined plot using patchwork
combined_plot <- list()

group_names <- unique(df_long$Group)
arrow_image_vector <- c(4, 8, 6, 7, 7, 3, 4, 1, 7, 1, 9, 1, 7, 2, 10, 6, 7, 1, 7, 7, 4)

# Debugging: Check lengths of groups and arrow vector
#print(length(group_names))
#print(length(arrow_image_vector))

# Loop to create individual group plots
for (i in 1:length(group_names)) {
  message(paste("Index:", i, "Group Name:", group_names[i], "Arrow Position:", arrow_image_vector[i]))
  group_plot <- create_group_plot(group_names[i], group_colors[i], i, arrow_image_vector)
  image_plot <- wrap_elements(image_grobs[[i]][[1]]) / wrap_elements(image_grobs[[i]][[2]]) / wrap_elements(image_grobs[[i]][[3]])
  combined_plot[[i]] <- group_plot / image_plot
}

# Combine all plots into a grid layout with 7 columns
final_plot <- wrap_plots(combined_plot, ncol = 7) 

# Save the final plot
ggsave("final_plot_reducesize.png", plot = final_plot, width = 55, height = 65, dpi = 50 , units = "in", limitsize = FALSE)

knitr::include_graphics("final_plot_reducesize.png")

4.7 Figure 3.5

library(tidyr)
library(cowplot)


# Create the data frame correctly
data <- data.frame(
  Category = rep(c("Healthy_1", "Healthy_2", "Healthy_3", "Healthy_4", "Healthy_5", "Healthy_6",
                   "Gingivitis_1", "Gingivitis_2", "Gingivitis_3", "Gingivitis_4", "Gingivitis_5",
                   "Mild PD_6", "Mild PD_7", "Mild PD_8", "Mild PD_9",
                   "Moderate PD_10", "Moderate PD_11", "Moderate PD_12", "Moderate PD_13",
                   "Severe_14", "Severe_15"), each = 10),
  Pic = rep(paste("pic", 1:10, sep = "-"), 21),
  Value = c(
    # Healthy groups
    370600, 261600, 610400, 675800, 327000, 436000, 457800, 348800, 283400, 392400, # Healthy_1
    52320, 39240, 23980, 50140, 41420, 50140, 23980, 61040, 43600, 54500,            # Healthy_2
    610400, 305200, 457800, 327000, 283400, 501400, 632200, 457800, 632200, 305200,  # Healthy_3
    523200, 109000, 87200, 174400, 1220800, 1460600, 414200, 784800, 370600, 414200, # Healthy_4
    348800, 370600, 348800, 218000, 261600, 1765800, 1329800, 261600, 218000, 327000, # Healthy_5
    174400, 196200, 130800, 545000, 741200, 1002800, 981000, 872000, 675800, 719400,  # Healthy_6
    
    # Gingivitis groups
    959200, 1111800, 1220800, 1199000, 588600, 1002800, 763000, 436000, 479600, 1133600, # Gingivitis_1
    675800, 1155400, 501400, 1046400, 632200, 937400, 501400, 1046400, 479600, 1177200,  # Gingivitis_2
    937400, 1155400, 457800, 1111800, 1068200, 1090000, 959200, 1024600, 806600, 392400, # Gingivitis_3
    675800, 1111800, 1220800, 784800, 588600, 1090000, 959200, 1046400, 697600, 981000,  # Gingivitis_4
    1896600, 1809400, 1569600, 1678600, 2289000, 1090000, 893800, 1242600, 981000, 806600, # Gingivitis_5
    
    # Mild Periodontal Disease
    981000, 1460600, 1133600, 1002800, 2703200, 1133600, 1940200, 2158200, 3030200, 1308000, # Mild PD_6
    1199000, 1090000, 1068200, 1395200, 2964800, 2114600, 1220800, 2092800, 2223600, 3226400, # Mild PD_7
    981000, 1853000, 1177200, 1242600, 1111800, 806600, 3139200, 2092800, 5232000, 3488000,   # Mild PD_8
    2746800, 3444400, 2463400, 2354400, 3553400, 2223600, 2289000, 2986600, 2071000, 2964800, # Mild PD_9
    
    # Moderate and Severe Periodontal Disease
    3444400, 3531600, 5253800, 5275600, 5275600, 1853000, 2354400, 2441600, 2550600, 2768600, # Moderate PD_10
    2463400, 3270000, 2768600, 3182800, 5886000, 2768600, 3182800, 4883200, 5035800, 3444400, # Moderate PD_11
    3226400, 3967600, 4490800, 4403600, 5188400, 5057600, 2812200, 3967600, 5798800, 4403600, # Moderate PD_12
    5035800, 3444400, 2267200, 9221400, 7259400, 3553400, 2223600, 2768600, 7128600, 9265000, # Moderate PD_13
    4883200, 8305800, 4665200, 9265000, 5057600, 5362800, 5886000, 10616600, 9526600, 5144800, # Severe_14
    8305800, 9286800, 8807200, 9831800, 8698200, 8022400, 7542800, 9243200, 9395800, 9984400  # Severe_15
  )
)

biosa_mean <- c(416380, 44036, 451260, 555900, 545000, 603860, 889440, 815320, 900340, 915600, 1425720, 1685140, 1859540, 2112420, 2709740, 3474920, 3688560, 4331660, 5216740, 6871360, 8911840)
biosa_mean <- rep(biosa_mean, each=10)
hemo_mean <- c(470000, 510000, 580000, 550000, 600000, 650000, 900000, 920000, 950000, 950000, 1500000, 2000000, 2000000, 3000000, 3100000, 3500000, 3900000, 4200000, 5700000, 7500000, 9000000)
hemo_mean <- rep(hemo_mean, each=10)

data$biosa_mean <- biosa_mean
data$hemo_mean <- hemo_mean

# Aggregate the data by Category to get the mean values
aggregated_data <- data %>%
  group_by(Category) %>%
  summarise(mean_biosa_mean = mean(biosa_mean),
            mean_hemo_mean = mean(hemo_mean))

# Linear model using mean values from `aggregated_data`
lm_model <- lm(mean_hemo_mean ~ mean_biosa_mean, data = aggregated_data)

# Extract the coefficients for the equation
intercept <- coef(lm_model)[1]
slope <- coef(lm_model)[2]
equation <- paste("y =", round(intercept, 2), "+", round(slope, 2), "x")

# Calculate statistics
summary_lm <- summary(lm_model)
r_squared <- summary_lm$r.squared
rmse <- sqrt(mean(lm_model$residuals^2))
p_value_slope <- summary_lm$coefficients[2, 4]
p_value_intercept <- summary_lm$coefficients[1, 4]
residual_se <- summary_lm$sigma
f_statistic <- summary_lm$fstatistic[1]
f_statistic_p_value <- pf(summary_lm$fstatistic[1], summary_lm$fstatistic[2], summary_lm$fstatistic[3], lower.tail = FALSE)

mean_hemo_mean <- mean(aggregated_data$mean_hemo_mean)

# Calculate RMSE in percentage (relative to the mean of the hemo_mean)
rmse_percentage <- (rmse / mean_hemo_mean) * 100

# Combine statistics into a string
stats_text <- paste0(
  "R² = ", round(r_squared, 2), "\n",
  "RMSE (%) = ", round(rmse_percentage, 2), "%\n",  # RMSE in percentage
  "p-value (Slope) = ", format(p_value_slope, digits = 1), "\n",
  "p-value (Intercept) = ", format(p_value_intercept, digits = 1), "\n",
  "Residual SE = ", round(residual_se, 1), "\n",
  "F-statistic = ", round(f_statistic, 1), ", p-value = ", format(f_statistic_p_value, digits = 4), "\n"
)

# Print the stats text
cat(stats_text)
## R² = 0.99
## RMSE (%) = 9.45%
## p-value (Slope) = 2e-20
## p-value (Intercept) = 0.09
## Residual SE = 248162.5
## F-statistic = 1895.7, p-value = 1.687e-20
# Combine equation, R², and RMSE into a single label with line breaks
equation_label <- paste(
  equation, "\n",  # Equation on the first line
  "R² = ", round(r_squared, 2), "\n",  # R² on the second line
  "RMSE (%) = ", round(rmse_percentage, 2), "%\n",  # RMSE in percentage
  sep = ""
)


# Create the plot with custom legend for intersection points (navy) and values points (dark gray)
p8 = ggplot() +
  # Plot individual points from the full dataset in dark gray, and add them to the legend
  geom_jitter(data = data, aes(x = hemo_mean, y = Value, color = "DePerio measures per image"), width = 0.05, height = 0.01, alpha = 0.5,size=4) +  # All points in dark gray
  # Plot mean points (intersection points) in navy blue, and add them to the legend
  geom_point(data = aggregated_data, aes(x = mean_hemo_mean, y = mean_biosa_mean, color = "DePerio(mean)_Control"), size = 5) +  # Mean points
  # Plot the linear model based on the intersection points
  geom_smooth(data = aggregated_data, aes(x = mean_hemo_mean, y = mean_biosa_mean), method = "lm", se = FALSE, color = "black", linetype = "solid", linewidth = 1) +  # Linear model fitting
  # Add the "DePerio VS Control" text above the equation
  annotate("text", x =min(aggregated_data$mean_hemo_mean)*5
           , y = max(aggregated_data$mean_biosa_mean)*1.06,  # Adjusted position above the equation
           label = "DePerio_Control Linearity", color = "black", hjust = 0.5, size = 6,
           family = "Times New Roman", fontface = "bold") +
  
  # Add the equation, R², and RMSE annotation inside the frame, positioned below the title
  annotate("text", x = min(aggregated_data$mean_hemo_mean)*5
           , y = max(aggregated_data$mean_biosa_mean)*0.8,  # Adjusted y-position
           label = equation_label, color = "black", hjust = 0.5, size = 5,
           family = "Times New Roman", fontface = "bold") +
  # Adjust x and y axis scales
  scale_y_continuous(labels = function(x) sprintf("%.1f", x / 1e5),
                     breaks = seq(0, max(data$biosa_mean, na.rm = TRUE), length.out = 10)) +  # More ticks on y-axis
  scale_x_continuous(labels = function(x) sprintf("%.1f", x / 1e5),
                     breaks = seq(0, max(data$hemo_mean, na.rm = TRUE), length.out = 10)) +  # More ticks on x-axis
  
  # Labels and title
  labs(
    x = expression(bold(paste("Number of oPMNs/10ml_Control (" , " x 10"^5, ")"))),
    y = expression(bold(paste("Number of oPMNs/10ml_DePerio (", " x 10"^5, ")")))
  ) +
  
  # Manual color scale for legend entries
  scale_color_manual(name = "Legend", 
                     values = c("DePerio(mean)_Control" = "navy", "DePerio measures per image" = "darkgray")) +
  
  # Theme adjustments (Set the plot and background to white)
  theme_minimal() +
  theme(
    axis.text = element_text(size = 12, family = "Times New Roman"),
    axis.title = element_text(size = 12, family = "Times New Roman", face = "bold"),
    legend.text = element_text(size = 11, family = "Times New Roman"),
    legend.position = c(0.75, 0.13), 
    legend.direction = "vertical",
    legend.spacing.y = unit(0.02, 'cm'),
    legend.key.size = unit(0.8, 'lines'),
    legend.title =  element_blank(),
    panel.background = element_blank(),
    plot.background = element_blank(),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    panel.border = element_rect(color = "black", fill = NA)
  )

#print(p8)
##############################
data <- read.xlsx("Categorize_data2.xlsx", colNames = TRUE)

# Multiply the second row by 2.18
data[2, 2:ncol(data)] <- data[2, 2:ncol(data)] * 2.18 * 10000

# Pivot the data to a long format
data_long <- pivot_longer(data,
                          cols = -Info,
                          names_to = "Category",
                          values_to = "Value")

# Remove leading and trailing whitespace from Info column
data_long$Info <- trimws(data_long$Info)

# Separate data into measurements and standard deviations
measurements <- data_long %>%
  filter(Info == "Biosa") %>%
  select(-Info)

stdevs <- data_long %>%
  filter(Info == "B-STDEV") %>%
  select(-Info)

measurements2 <- data_long %>%
  filter(Info == "Hemo") %>%
  select(-Info)

# Merge the datasets
data_final <- left_join(measurements, stdevs, by = "Category", suffix = c(".mean", ".stdev"))
data_final <- left_join(data_final, measurements2, by = "Category")

# Create a simpler category label if necessary
data_final$Category <- gsub("_\\d+", "", data_final$Category)
data_final$Category <- gsub("\\.", " ", data_final$Category)

# Calculate the count of observations for each category
data_final <- data_final %>%
  group_by(Category) %>%
  mutate(n = n()) %>%
  ungroup()
data_final$Category <- factor(data_final$Category, levels = c(
  "Healthy", "Gingivitis", "Mild Periodontitis", "Moderate Periodontitis", "Severe Periodontitis"
))

custom_colors <- c("Healthy" = "#228B22", # Dark green
                   "Gingivitis" = "#66c2a4", # Light green
                   "Mild Periodontitis" = "#fc8d62", # Light orange
                   "Moderate Periodontitis" = "#e31a1c", # Dark orange
                   "Severe Periodontitis" = "#8B0000") # Dark red

x_labels <- c("N=6\n(1)", "N=5\n(2)", "N=4\n(3)", "N=4\n(4)", "N=2\n(5)")

data_final <- data_final %>%
  group_by(Category) %>%
  mutate(mean = mean(Value.mean, na.rm = TRUE)) %>%
  mutate(mean_stdev = mean(Value.stdev, na.rm = TRUE)) %>%
  ungroup()



p <- ggplot(data_final, aes(x = Category, y = Value.mean)) +
  geom_boxplot(fill = "white", color = "black") +
  geom_jitter(aes(color = Category), # Color jitter points by Category
              position = position_jitter(0.2), alpha = 2, size = 3, show.legend = FALSE) +
  geom_errorbar(aes(ymin = mean - mean_stdev, ymax = mean + mean_stdev), 
                width = 0.2) +
  scale_fill_manual(values = custom_colors) +
  stat_summary(aes(color = "Mean"), fun = mean, geom = "point", shape = 15, size = 2) +  # Add mean marker
  stat_summary(aes(color = "Max"), fun = max, geom = "point", shape = 17, size = 2) +  # Add max marker
  stat_summary(aes(color = "Min"), fun = min, geom = "point", shape = 16, size = 2) +  # Add min marker
  scale_color_manual(name = "Statistics labels", values = c(custom_colors,"Mean" = "black", "Max" = "black", "Min" = "black"),
                     breaks = c("Mean", "Max", "Min")) +
  theme_minimal()

p <- p + stat_compare_means(comparisons = list(
  c("Healthy", "Gingivitis"), c("Gingivitis", "Mild Periodontitis"),
  c("Mild Periodontitis", "Moderate Periodontitis"), c("Moderate Periodontitis", "Severe Periodontitis"),
  c("Healthy", "Mild Periodontitis"), c("Healthy", "Moderate Periodontitis"),
  c("Healthy", "Severe Periodontitis"), c("Gingivitis", "Moderate Periodontitis"),
  c("Gingivitis", "Severe Periodontitis")
), method = "wilcox.test", method.args = list(exact = TRUE), size =3.5, family = "Times New Roman")

p9 <- p + labs(
  x = "Groups"
) +
  theme_minimal() +
  theme(
    axis.text= element_text(size = 12, family = "Times New Roman"),
    axis.title = element_text(size = 12, family = "Times New Roman",face="bold"),
    legend.text = element_text(size = 11, family = "Times New Roman"),
    legend.position = c(0.9, 0.12),  # Position legend in bottom right inside plot
    legend.direction = "vertical",  # Arrange legend items vertically
    legend.spacing.y = unit(0.02, 'cm'),  # Further reduce space between legend lines
    legend.key.size = unit(0.8, 'lines'),  # Reduce the size of the legend key
    legend.title = element_blank(),
    panel.background = element_blank(), # Remove background color
    plot.background = element_blank(), # Remove plot background color
    panel.grid.major = element_blank(), # Set major grid lines to gray
    panel.grid.minor = element_blank(),  # Set minor grid lines to light gray
    panel.border = element_rect(color = "black", fill = NA),
    #aspect.ratio = 1.3/1
  )  +
  scale_y_continuous(labels = function(x) sprintf("%.1f", x / 1e5) # Adjust y-axis labels
  ) +
  scale_x_discrete(labels = x_labels) +
  labs(
    y = expression(bold(paste("Number of oPMNs/10ml (" , " x 10"^5, ")")))
  ) +
  guides(fill = FALSE)  # Remove legend for fill
#print(p9)
####################################
library(boot)
# Load the data
data <- read.xlsx("Categorize_data2.xlsx", colNames = TRUE)

# Pivot the data to a long format
data_long <- pivot_longer(data, cols = -Info, names_to = "Category", values_to = "Value")

# Separate data into measurements and standard deviations
measurements <- data_long %>%
  filter(Info == "Biosa") %>%
  select(-Info)

stdevs <- data_long %>%
  filter(Info == "B-STDEV") %>%
  select(-Info)

measurements2 <- data_long %>%
  filter(Info == "Hemo") %>%
  select(-Info)

# Merge the datasets
data_final <- left_join(measurements, stdevs, by = "Category", suffix = c(".mean", ".stdev"))
data_final <- left_join(data_final, measurements2, by = "Category")

# Create a simpler category label if necessary
data_final$Category <- gsub("_\\d+", "", data_final$Category)
data_final$Category <- gsub("\\.", " ", data_final$Category)

# Order the categories
data_final$Category <- factor(data_final$Category, levels = c("Healthy", "Gingivitis", "Mild Periodontitis", "Moderate Periodontitis", "Severe Periodontitis"))

# Function to calculate mean for bootstrapping
mean_fun <- function(data, indices) {
  return(mean(data[indices]))
}

# Bootstrap confidence intervals
bootstrap_ci <- function(data, n_boot = 5000, conf_level = 0.95) {
  boot_result <- boot(data = data, statistic = mean_fun, R = n_boot)
  ci <- boot.ci(boot_result, type = "perc", conf = conf_level)
  return(ci$percent[4:5])
}

# Apply bootstrap for BIOSA Method
summary_data_biosa <- data_final %>%
  group_by(Category) %>%
  summarise(
    mean_value_biosa = mean(Value.mean, na.rm = TRUE),
    ci_lower_biosa = bootstrap_ci(Value.mean, conf_level = 0.95)[1],
    ci_upper_biosa = bootstrap_ci(Value.mean, conf_level = 0.95)[2],
    n = n()
  )

# Calculate the mean for each category for Hemo
summary_data_hemo <- data_final %>%
  group_by(Category) %>%
  summarise(
    mean_value_hemo = mean(Value, na.rm = TRUE),
    ci_lower_hemo = bootstrap_ci(Value, conf_level = 0.95)[1],
    ci_upper_hemo = bootstrap_ci(Value, conf_level = 0.95)[2],
    n = n()
  )

# Merge Biosa and Hemo data
summary_data <- left_join(summary_data_biosa, summary_data_hemo, by = "Category")


# Create custom x-axis labels
x_labels <- c("N=6\n(1)", "N=5\n(2)", "N=4\n(3)", "N=4\n(4)", "N=2\n(5)")

# Plot the data with error bars
p10 <- ggplot(summary_data) +
  geom_point(aes(x = Category, y = mean_value_biosa, color = "DePerio(mean)"), shape = 19 , size=3) +
  geom_point(aes(x = Category, y = mean_value_hemo, color = "Conrol"), shape = 17, size=3) +
  geom_errorbar(aes(x = Category, ymin = ci_lower_biosa, ymax = ci_upper_biosa, color = "DePerio"), width = 0.2) +
  geom_errorbar(aes(x = Category, ymin = ci_lower_hemo, ymax = ci_upper_hemo, color = "Conrol"), width = 0.2) +
  scale_color_manual(values = c("DePerio(mean)" = "navy",
                                "Conrol" = "#B8860B"))+
  annotate("text", x = 0.6, y = 86*100000, label = "95% C.I",
           color = "black", hjust = 0, vjust = 0, size = 4,
           family = "Times New Roman",fontface = "bold") +
  labs(
    x = "Groups"
  ) +
  theme_minimal() +
  theme(
    axis.text = element_text(size = 12, family = "Times New Roman"),
    axis.title = element_text(size = 12, family = "Times New Roman",face="bold"),
    legend.text = element_text(size = 11, family = "Times New Roman"),
    legend.position = c(0.1, 0.83),  # Position legend in bottom right inside plot
    legend.direction = "vertical",  # Arrange legend items vertically
    legend.spacing.y = unit(0.02, 'cm'),  # Further reduce space between legend lines
    legend.key.size = unit(0.8, 'lines'),  # Reduce the size of the legend key
    legend.title = element_blank(),
    panel.background = element_blank(), # Remove background color
    plot.background = element_blank(), # Remove plot background color
    panel.grid.major = element_blank(), # Set major grid lines to gray
    panel.grid.minor = element_blank(), # Set minor grid lines to light gray
    panel.border = element_rect(color = "black", fill = NA)
    # aspect.ratio = 2/1
    
  ) +
  scale_y_continuous(labels = function(x) sprintf("%.1f", x / 1e5)) + # Adjust y-axis labels
  scale_x_discrete(labels = x_labels) +
  labs(
    y = expression(bold(paste("Number of oPMNs/10ml (" , " x 10"^5, ")")))
  ) +
  guides(fill = FALSE) # Remove legend for fill

#p10

#############################
library(gridExtra)
library(grid)
library(gtable)
library(cowplot)


# Ensure font consistency in all plots
font_size <- 10
font_family <- "Times New Roman"

# Apply consistent theme with bold axis titles to all plots
p8 <- p8 + theme(text = element_text(size = font_size, family = font_family),
                 axis.title.x = element_text(face = "bold"),
                 axis.title.y = element_text(face = "bold", margin = margin(r = 30)),
                 plot.margin = margin(t = 8, r = 6, b = 40, l = 6)
)


p9 <- p9 + theme(text = element_text(size = font_size, family = font_family),
                 axis.title.x = element_text(face = "bold"),
                 axis.title.y = element_text(face = "bold", margin = margin(r = 30)),
                 plot.margin = margin(t = 8, r = 1, b = 8, l = 6)
)

p10 <- p10 + theme(text = element_text(size = font_size, family = font_family),
                   axis.title.x = element_text(face = "bold"),
                   axis.title.y = element_text(face = "bold", margin = margin(r = 30)),
                   plot.margin = margin(t = 8, r = 6, b = 8, l = 1)
)

# Align the width of p10 to match p8 using plot_grid
p8 = plot_grid(p8, ncol = 1)
p10 = plot_grid(p10, ncol = 1) # Ensure p10 is in a single column layout to match p8's width

# Create combined plot of p9 and p10
combined_p9_p10 <- plot_grid(p9, p10, ncol = 2, rel_widths = c(2, 1), align = "v", axis = "tb")

# Arrange the plots in 2 rows and 2 columns
q <- plot_grid(
  p8 ,
  combined_p9_p10,
  nrow = 2, align = "v", axis = "tb"
)

# Save the final arranged plot to a file
ggsave("arrange3.png", plot = q, width = 10, height = 11)
knitr::include_graphics("arrange3.png")